Example #1
0
 def set_FIMO_thresh(self, FIMO_thresh):
     try:
         float_thresh = float(FIMO_thresh)
         if float_thresh >= 0 and float_thresh <= 1:
             self.FIMO_thresh = FIMO_thresh
         else:
             message = (
                 "Error: the following value for FIMO threshold <{FIMO_thresh}> "
                 "is not correct").format(FIMO_thresh=FIMO_thresh)
             raise exceptions.WrongArgumentError(message)
     except:
         message = (
             "Error: the following value for FIMO threshold <{FIMO_thresh}> "
             "is not correct").format(FIMO_thresh=FIMO_thresh)
         raise exceptions.WrongArgumentError(message)
Example #2
0
 def set_num_threads(self, num_threads):
     """Sets number of threads according to user input"""
     try:
         int_min = int(num_threads)
         if int_min > 0:
             self.num_threads = num_threads
         else:
             message = (
                 "Error: the following value for number of threads <{num_threads}> "
                 "is not correct").format(num_threads=num_threads)
             raise exceptions.WrongArgumentError(message)
     except:
         message = (
             "Error: the following value for number of threads <{num_threads}> "
             "is not correct").format(num_threads=num_threads)
         raise exceptions.WrongArgumentError(message)
Example #3
0
 def set_RNAseq_min_length(self, RNAseq_min_length):
     """Sets minimum length for RNAseq sequences according to user input"""
     try:
         int_min = int(RNAseq_min_length)
         if int_min > 0:
             self.RNAseq_min_length = RNAseq_min_length
         else:
             message = (
                 "Error: the following value for minimum RNAseq length <{min_len}> "
                 "is not correct").format(min_len=RNAseq_min_length)
             raise exceptions.WrongArgumentError(message)
     except:
         message = (
             "Error: the following value for minimum RNAseq length <{min_len}> "
             "is not correct").format(min_len=RNAseq_min_length)
         raise exceptions.WrongArgumentError(message)
Example #4
0
 def set_AME_scoring(self, AME_scoring):
     if AME_scoring in ["max", "avg"]:
         self.AME_scoring = AME_scoring
     else:
         message = (
             "Error: the following AME scoring method <{AME_scoring}> "
             "is not correct").format(AME_scoring=AME_scoring)
         raise exceptions.WrongArgumentError(message)
Example #5
0
 def set_rand_sv_ratio(self, rand_sv_ratio):
     try:
         self.rand_sv_ratio = int(rand_sv_ratio)
     except:
         message = (
             "Error: the following random to SV sequences ratio <{AME_scoring}> "
             "is not correct").format(rand_sv_ratio=rand_sv_ratio)
         raise exceptions.WrongArgumentError(message)
Example #6
0
 def set_SV_types(self, SV_types_string):
     for SV_type in SV_types_string.split(","):
         if SV_type not in ["inv", "dup", "tra", "del"]:
             message = (
                 "Error: the following SV type <{SV_type}> is incorrect."
             ).format(SV_type=SV_type)
             raise exceptions.WrongArgumentError(message)
     self.SV_types = SV_types_string.split(",")
Example #7
0
 def set_sample_attr(self, sample_attr_string):
     dict_attr = {}
     for pair in sample_attr_string.split(","):
         if ":" not in pair:
             message = "Error: please use the correct format for attribute listing"
             raise exceptions.WrongArgumentError(message)
         if pair.split(":")[0] in dict_attr.keys():
             dict_attr[pair.split(":")[0]].append(pair.split(":")[1])
         else:
             dict_attr[pair.split(":")[0]] = [pair.split(":")[1]]
     self.sample_attr = dict_attr
Example #8
0
 def set_population(self, population):
     """Sets population origin of sample according to user input"""
     if population in [
             "asian_pacific_islander", "black", "caucasian", "hispanic",
             "native_american"
     ]:
         self.population = population
     else:
         message = ("Error: the following population <{population}> "
                    "is not correct").format(population=population)
         raise exceptions.WrongArgumentError(message)
Example #9
0
def main():
    """
    The main function: 
    - parses through all the command line arguments
    - creates the hla_pipeline class
    - verifies there are no missing arguments
    -runs the program and analysis
    """
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "g:e:r:G:l:p:o:t:H:s:K:a:P:L:c:h", [
                "input_WGS=", "input_WES=", "input_RNAseq=", "genome_dir=",
                "RNAseq_min_length=", "population=", "output_dir=",
                "num_threads=", "dir_HLA_HD=", "dir_seq2HLA=", "dir_Kourami=",
                "dir_arcasHLA", "dir_picard=", "dir_HLA_LA=", "correct_HLA",
                "help"
            ])
    except getopt.GetoptError as e:
        print(e)
        sys.exit(2)
    if len(args) > 0:
        message = "Error: non-paired arguments are not allowed."
        raise exceptions.WrongArgumentError(message)

    HLA_pipeline = pipeline.HLAPipeline()
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            description()
            sys.exit()
        elif opt in ("-g", "--input_WGS"):
            HLA_pipeline.set_path_WGS(arg)
        elif opt in ("-e", "--input_WES"):
            HLA_pipeline.set_path_WES(arg)
        elif opt in ("-r", "--input_RNAseq"):
            HLA_pipeline.set_path_RNAseq(arg)
        elif opt in ("-G", "--dir_genome"):
            HLA_pipeline.set_dir_genome(arg)
        elif opt in ("-l", "--RNAseq_min_length"):
            HLA_pipeline.set_RNAseq_min_length(arg)
        elif opt in ("-p", "--population"):
            HLA_pipeline.set_population(arg)
        elif opt in ("-o", "--output_dir"):
            HLA_pipeline.set_output_dir(arg)
        elif opt in ("-t", "--num_threads"):
            HLA_pipeline.set_num_threads(arg)
        elif opt in ("-H", "--dir_HLA_HD"):
            HLA_pipeline.set_dir_HLA_HD(arg)
        elif opt in ("-s", "--dir_seq2HLA"):
            HLA_pipeline.set_dir_seq2HLA(arg)
        elif opt in ("-K", "--dir_Kourami"):
            HLA_pipeline.set_dir_Kourami(arg)
        elif opt in ("-a", "--dir_arcasHLA"):
            HLA_pipeline.set_dir_arcasHLA(arg)
        elif opt in ("-P", "--dir_picard"):
            HLA_pipeline.set_dir_picard(arg)
        elif opt in ("-L", "--dir_HLA_LA"):
            HLA_pipeline.set_dir_HLA_LA(arg)
        elif opt in ("-c", "--correct_HLA"):
            HLA_pipeline.set_path_correct_HLA(arg)
        else:
            message = "Error: {opt} is not a valid option".format(opt=opt)
            raise exceptions.WrongArgumentError(message)
    for pipeline_attr in [
            "input_WGS", "input_WES", "input_RNAseq", "RNAseq_min_length",
            "population", "output_dir", "dir_genome", "dir_HLA_HD",
            "dir_arcasHLA", "dir_seq2HLA", "dir_Kourami", "dir_picard",
            "dir_HLA_LA"
    ]:
        if not hasattr(HLA_pipeline, pipeline_attr):
            message = ("Error: you must indicate --{attr}.").format(
                attr=pipeline_attr)
            raise exceptions.MissingArgumentError(message)

    for path in (HLA_pipeline.path_WGS, HLA_pipeline.path_WES):
        runprogram.Kourami(HLA_pipeline, path)
        runprogram.HLA_LA(HLA_pipeline, path)
    runprogram.seq2HLA(HLA_pipeline)
    runprogram.HLA_HD(HLA_pipeline)
    runprogram.arcasHLA(HLA_pipeline)
    analysis.extract_results(HLA_pipeline)
    analysis.calculate_accuracy(HLA_pipeline)
def main():
    '''Reads input from terminal and coordinates pipeline'''
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "i:o:f:l:e:m:a:t:s:r:F:A:c:p:h", [
                "input_dir=", "output_dir=", "genome_fasta=", "genome_len=",
                "genome_include=", "motif_path=", "sample_attr=",
                "sampleinfo_table=", "SV_types=", "rand_sv_ratio=",
                "FIMO_thresh=", "AME_scoring=", "config=", "prefix=", "help"
            ])
    except getopt.GetoptError as e:
        print(e)
        sys.exit(2)

    if len(args) > 0:
        message = "Error: non-paired arguments are not allowed."
        raise exceptions.WrongArgumentError(message)

    motif_pipeline = pipeline.MotifPipeline()
    sample_attr_path = None
    genome_fasta = None
    genome_len = None
    genome_include = None
    prefix = None
    config_name = "local"

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            description()
            sys.exit()
        elif opt in ("-i", "--input_dir"):
            motif_pipeline.set_input_dir(arg)
        elif opt in ("-o", "--output_dir"):
            motif_pipeline.set_output_dir(arg)
        elif opt in ("-f", "--genome_fasta"):
            genome_fasta = arg
        elif opt in ("-l", "--genome_len"):
            genome_len = arg
        elif opt in ("-e", "--genome_include"):
            genome_include = arg
        elif opt in ("-m", "--motif_path"):
            motif_pipeline.set_motif_path(arg)
        elif opt in ("-a", "--sample_attr"):
            motif_pipeline.set_sample_attr(arg)
        elif opt in ("-t", "--sampleinfo_table"):
            sample_attr_path = arg
        elif opt in ("-s", "--SV_types"):
            motif_pipeline.set_SV_types(arg)
        elif opt in ("-r", "--rand_sv_ratio"):
            motif_pipeline.set_rand_sv_ratio(arg)
        elif opt in ("-F", "--FIMO_thresh"):
            motif_pipeline.set_FIMO_thresh(arg)
        elif opt in ("-A", "--AME_scoring"):
            motif_pipeline.set_AME_scoring(arg)
        elif opt in ("-c", "--config"):
            config_name = arg
        elif opt in ("-p", "--prefix"):
            prefix = arg
        else:
            message = "Error: {opt} is not a valid option".format(opt=opt)
            raise exceptions.WrongArgumentError(message)

    if ((sample_attr_path is None and not motif_pipeline.sample_attr == "all")
            or (sample_attr_path is not None
                and motif_pipeline.sample_attr == "all")):
        message = "Error: you must indicate both --sampleinfo_table and --sample_attr, or neither."
        raise exceptions.MissingArgumentError(message)
    if genome_fasta is None:
        message = "Error: you must indicate --genome_fasta."
        raise exceptions.MissingArgumentError(message)
    if genome_len is None:
        message = "Error: you must indicate --genome_len."
        raise exceptions.MissingArgumentError(message)
    if genome_include is None:
        message = "Error: you must indicate --genome_include."
        raise exceptions.MissingArgumentError(message)
    for pipeline_attr in ["input_dir", "output_dir", "motif_path"]:
        if not hasattr(motif_pipeline, pipeline_attr):
            message = ("Error: you must indicate --{attr}.").format(
                attr=pipeline_attr)
            raise exceptions.MissingArgumentError(message)
    motif_pipeline.set_subdir_name(prefix)
    motif_pipeline.write_description()
    motif_pipeline.set_list_bedpe(sample_attr_path)
    reference_genome = refgenome.ReferenceGenome(genome_fasta, genome_len,
                                                 genome_include)
    base_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
    try:
        config = os.path.join(base_dir, 'configs', '{}.py'.format(config_name))
        spec = importlib.util.spec_from_file_location('', config)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        parsl.load(module.config)
    except:
        raise exceptions.IncorrectPathError(
            "Cannot find the config file <{config_name}>.".format(
                config_name=config_name))

    if not os.path.isdir(motif_pipeline.output_dir + "bed_files"):
        os.mkdir(motif_pipeline.output_dir + "bed_files")
    for file_name in motif_pipeline.list_bedpe:
        sv_types_to_run = get_SV_types(motif_pipeline, file_name)
        if sv_types_to_run:
            extractdata.bedpe_to_bed(reference_genome, motif_pipeline,
                                     file_name, sv_types_to_run)
    parsl.wait_for_current_tasks()
    runprogram.merge(motif_pipeline)
    motif_pipeline.set_num_SV_breakpoints()
    runprogram.bedtools(motif_pipeline, reference_genome)
    runprogram.FIMO(motif_pipeline)
    runprogram.AME(motif_pipeline)
    extractdata.extract_list_sequences_AME(motif_pipeline)
    extractdata.extract_output_FIMO(motif_pipeline)
    extractdata.extract_output_AME(motif_pipeline)
    graphs.generate_histogram(motif_pipeline)