Exemple #1
0
def run_shorts(folder):
    fixed = 0
    problematic = ["Ca21chr1-orf19.4923.1","Ca21chr3-orf19.6828.1","Ca21chr5-orf19.961.2","Ca21chrR-orf19.1738.1","Ca21chrR-orf19.4380.1"]
    for filename in os.listdir(folder):
        if filename in problematic:
            new = "/sternadi/home/volume2/ella/"+ filename + "/" + filename + ".fasta"
            print(new)
            os.makedirs("/sternadi/home/volume2/ella/"+ filename)
            os.rename("/sternadi/home/volume2/ella/Candida/Genes/NoRef/"+filename + "/" + filename + ".fasta",
                     new)

            seqFileTools.convert_fasta_to_phylip(new, outfile=None)

            M8a_CTL = new.replace(".fasta", "-M8a.CLT")
            M8_CTL = new.replace(".fasta", "-M8.CLT")
            PAML_utilities.candida_write_ctl_codeml_file(
                M8a_CTL,
                new.replace(".fasta", ".phy"),
                "/sternadi/home/volume2/ella/Candida/Trees/SNP_seq_without_ref.phy_phyml_tree.txt",
                new.replace(".fasta", "-M8a-Results.txt"),
                1)
            PAML_utilities.candida_write_ctl_codeml_file(
                M8_CTL,
                new.replace(".fasta", ".phy"),
                "/sternadi/home/volume2/ella/Candida/Trees/SNP_seq_without_ref.phy_phyml_tree.txt",
                new.replace(".fasta", "-M8-Results.txt"),
                0)

            candida_codeml_runner(M8a_CTL, M8_CTL, filename)

            fixed += 1
    print("created shorts for: " + str(fixed))
Exemple #2
0
def create_files_for_genes(folder):
    for filename in os.listdir(folder):
        if filename.endswith(".fasta"):
            file = open("/sternadi/home/volume2/ella/Candida/Genes/" + filename, "r")
            #skip the reference gene
            file.readline()
            file.readline()
            folder = "/sternadi/home/volume2/ella/Candida/Genes/NoRef/" + filename[:-6]
            os.makedirs(folder)
            output = open(folder + "/" + filename, "w")

            # write all sequences with A in the begining of the name, and without the stop codon at the end
            for line in file:
                if ">" in line:
                    output.write(line.replace(">",">A"))
                else:
                    output.write(line[:-4].upper() + "\n")
            output.close()
            file.close()

            #create phylip
            seqFileTools.convert_fasta_to_phylip(
                folder + "/" + filename, outfile=None)

            #create CLT
            M8a_CTL = folder + "/" + filename.replace(".fasta", "-M8a.CLT")
            M8_CTL = folder + "/" + filename.replace(".fasta", "-M8.CLT")
            PAML_utilities.candida_write_ctl_codeml_file(
                M8a_CTL,
                folder + "/" + filename.replace(".fasta", ".phy"),
                "/sternadi/home/volume2/ella/Candida/Trees/SNP_seq_without_ref.phy_phyml_tree.txt",
                folder + "/" + filename.replace(".fasta", "-M8a-Results.txt"),
                1)
            PAML_utilities.candida_write_ctl_codeml_file(
                M8_CTL,
                folder + "/" + filename.replace(".fasta", ".phy"),
                "/sternadi/home/volume2/ella/Candida/Trees/SNP_seq_without_ref.phy_phyml_tree.txt",
                folder + "/" + filename.replace(".fasta", "-M8-Results.txt"),
                0)

            #run codeml
            candida_codeml_runner(M8a_CTL, M8_CTL, filename[:-6])
Exemple #3
0
def main():
    parser = OptionParser("usage: %prog [options]")
    parser.add_option("-i",
                      "--input",
                      dest="input",
                      help="fasta sequence file")
    parser.add_option("-o",
                      "--output",
                      dest="output",
                      help="phylip output file",
                      default=None)

    (options, args) = parser.parse_args()
    input = options.input
    output = options.output
    input = check_filename(input)
    if output != None:
        ouput = check_filename(output, Truefile=False)

    output = convert_fasta_to_phylip(input, output)
    print("converted %s fasta file into %s" % (input, output))
def phyml_aa_runner(alignment, alias="phyml", phylip=True):
    """
    run phyml on aa alignment on cluster
    :param alignment: alignment file path
    :param alias: job name (default: phyml)
    :param phylip: True if phylip file, False if fasta file
    :return: job id
    """
    alignment = check_filename(alignment)
    if phylip == False:
        alignment = convert_fasta_to_phylip(alignment)
    cmdfile = pbs_jobs.get_cmdfile_dir("phyml", alias)
    tnum = 1
    gmem = 2
    cmds = "/sternadi/home/volume1/shared/tools/PhyML/PhyML_3.0_linux64 -i %s -d aa -q -b 0" % alignment
    pbs_jobs.create_pbs_cmd(cmdfile=cmdfile,
                            alias=alias,
                            jnum=tnum,
                            gmem=gmem,
                            cmds=cmds)
    job_id = pbs_jobs.submit(cmdfile)
    return job_id
Exemple #5
0
def rerun_middle_stops(folder):
    """
    This function runs over the original gene folder and for every gene that is in the list "run_again" it creates its gene
    folder empty, modifies the sequence so it has gaps instead of stop codons in the middle and then runs PAML again.
    After creating the new Fasta file it will confirm that there is a difference between the sequences.
    :param folder: The general Genes folder
    :return number of files changed
    """
    cnt = 0
    run_again = [
        "Ca21chr2-orf19.813", "Ca21chr6-RBF1", "Ca21chr4-PGA31",
        "Ca21chr5-orf19.1935", "Ca21chr4-GST1", "Ca21chr2-orf19.894",
        "Ca21chr7-orf19.5139", "Ca21chr1-CHS3", "Ca21chr4-orf19.2680",
        "Ca21chr3-orf19.6008", "Ca21chr1-orf19.4984", "Ca21chr4-RAM1",
        "Ca21chr2-orf19.1768", "Ca21chr5-orf19.4337", "Ca21chrR-orf19.1737",
        "Ca21chrR-orf19.6382", "Ca21chr1-orf19.7278", "Ca21chr1-orf19.6209",
        "Ca21chr3-FGR23", "Ca21chr4-ZCF27", "Ca21chr4-ERG26",
        "Ca21chr5-orf19.937"
    ]
    for filename in os.listdir(folder):
        if filename[:-6] in run_again:
            seqs = []
            cnt += 1
            gene_folder = "/sternadi/home/volume2/ella/Candida/Genes/NoRef/" + filename[:
                                                                                        -6]
            shutil.rmtree(gene_folder)
            file = open(
                "/sternadi/home/volume2/ella/Candida/Genes/" + filename, "r")
            #skip the reference gene
            file.readline()
            file.readline()
            os.makedirs(gene_folder)
            output = open(gene_folder + "/" + filename, "w")

            # write all sequences with A in the begining of the name, and without the stop codon at the end
            # also ommits the stop codons from the middle and completes it with gaps.
            for line in file:
                if ">" in line:
                    output.write(line.replace(">", ">A"))
                else:
                    new_line = return_gapped_seq(line[:-4].upper())
                    output.write(new_line + "\n")
                    seqs += [new_line]
            output.close()
            file.close()

            #confirm all the sequences are not the same:
            if all_the_same(seqs):
                print("all the same in " + filename)
                continue

            #create phylip
            seqFileTools.convert_fasta_to_phylip(gene_folder + "/" + filename,
                                                 outfile=None)

            #create CLT
            M8a_CTL = gene_folder + "/" + filename.replace(
                ".fasta", "-M8a.CLT")
            M8_CTL = gene_folder + "/" + filename.replace(".fasta", "-M8.CLT")
            PAML_utilities.candida_write_ctl_codeml_file(
                M8a_CTL,
                gene_folder + "/" + filename.replace(".fasta", ".phy"),
                "/sternadi/home/volume2/ella/Candida/Trees/SNP_seq_without_ref.phy_phyml_tree.txt",
                gene_folder + "/" +
                filename.replace(".fasta", "-M8a-Results.txt"), 1)
            PAML_utilities.candida_write_ctl_codeml_file(
                M8_CTL, gene_folder + "/" + filename.replace(".fasta", ".phy"),
                "/sternadi/home/volume2/ella/Candida/Trees/SNP_seq_without_ref.phy_phyml_tree.txt",
                gene_folder + "/" +
                filename.replace(".fasta", "-M8-Results.txt"), 0)

            #run codeml
            run_PAML.candida_codeml_runner(M8a_CTL, M8_CTL, filename[:-6])

    print(str(cnt) + " files were ran again")