Ejemplo n.º 1
0
def reverse(master_file, organism):
    """
        Reverse blast a fasta file of multiple sequences against the database
        of an organism.
    """
    # grab just the name
    filename = os.path.splitext(master_file)[0]
    fastas = split_fasta(master_file)
    print("Total number of sequences before blasting: " + str(len(fastas)))
    # Run individual blasts
    processes = [query(f + ".fasta", f +"_blast.txt", entrez_query=organism) for f in fastas]

    good_fastas = []
    bad_fastas = []
    for f in fastas:
        found = organism_in_blast(organism, f+ "_blast.txt")
        if found:
            good_fastas.append(f)
        else:
            bad_fastas.append(f)

    g = open(filename + "_reversed.fasta", 'w')
    for fasta in good_fastas:
        f = open(fasta + ".fasta", "r")
        g.write(f.read())
        f.close()
    g.close()
    print("Final number of sequences after blasting: " + str(len(good_fastas)))

    for f in bad_fastas:
        os.remove(f + ".fasta")
        os.remove(f + "_blast.txt")
    print("Done!")
Ejemplo n.º 2
0
def seeds(fasta, as_homologset=True, rm_blast=False, **kwargs):
    """ Blast a set of seed sequences.

        Arguments:
        ---------
        fasta : str
            filename for fasta containing seed sequences.
        as_homologset: bool [default=true]
            Convert blast results to homolog set.

        kwargs are passed to blasting method.
    """
    # grab just the name
    filename = os.path.splitext(fasta)[0]
    fastas = split_fasta(fasta)
    print("Total number of sequences before blasting: " + str(len(fastas)))

    # Make a directory for storing the blast results.
    cwd = os.getcwd()
    blastpath = os.path.join(cwd, "blast")
    os.mkdir(blastpath)

    # Run individual blasts
    outnames = []
    for f in fastas:
        # Make filenames
        iname = f + ".fasta"
        oname = os.path.join(blastpath, f + "_blast.txt")

        # Send query to NCBI
        process = query(iname, oname, kwargs)
        outnames.append(oname)

    # If homolog_set should be made, return homolog_set
    if as_homologset:
        # Convert to homologset
        homologset = to_homologset(outnames, tag_list=DEFAULTS)
        return homologset
Ejemplo n.º 3
0
def seeds(fasta, as_homologset=True, rm_blast=False, **kwargs):
    """ Blast a set of seed sequences.

        Arguments:
        ---------
        fasta : str
            filename for fasta containing seed sequences.
        as_homologset: bool [default=true]
            Convert blast results to homolog set.

        kwargs are passed to blasting method.
    """
    # grab just the name
    filename = os.path.splitext(fasta)[0]
    fastas = split_fasta(fasta)
    print("Total number of sequences before blasting: " + str(len(fastas)))

    # Make a directory for storing the blast results.
    cwd = os.getcwd()
    blastpath = os.path.join(cwd, "blast")
    os.mkdir(blastpath)

    # Run individual blasts
    outnames = []
    for f in fastas:
        # Make filenames
        iname = f+".fasta"
        oname = os.path.join(blastpath, f+"_blast.txt")

        # Send query to NCBI
        process = query(iname, oname, kwargs)
        outnames.append(oname)

    # If homolog_set should be made, return homolog_set
    if as_homologset:
        # Convert to homologset
        homologset = to_homologset(outnames, tag_list=DEFAULTS)
        return homologset
Ejemplo n.º 4
0
def reverse(master_file, organism):
    """
        Reverse blast a fasta file of multiple sequences against the database
        of an organism.
    """
    # grab just the name
    filename = os.path.splitext(master_file)[0]
    fastas = split_fasta(master_file)
    print("Total number of sequences before blasting: " + str(len(fastas)))
    # Run individual blasts
    processes = [
        query(f + ".fasta", f + "_blast.txt", entrez_query=organism)
        for f in fastas
    ]

    good_fastas = []
    bad_fastas = []
    for f in fastas:
        found = organism_in_blast(organism, f + "_blast.txt")
        if found:
            good_fastas.append(f)
        else:
            bad_fastas.append(f)

    g = open(filename + "_reversed.fasta", 'w')
    for fasta in good_fastas:
        f = open(fasta + ".fasta", "r")
        g.write(f.read())
        f.close()
    g.close()
    print("Final number of sequences after blasting: " + str(len(good_fastas)))

    for f in bad_fastas:
        os.remove(f + ".fasta")
        os.remove(f + "_blast.txt")
    print("Done!")