Example #1
0
def blastall_file2db(fname,dbname="",blastprogram="blastp",output="ncbiparsed",extra_blastp_params={'F': 'F', 'e': '10'}):
    """
    """
    if blastprogram not in ['blastp','tblastn','blastn','tblastx']:
        raise "only blastp and tblastn are supported"

    extra_params = " ".join(["-%s %s" % (k,v) for k,v in extra_blastp_params.iteritems()])
    command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH,blastprogram,extra_params,fname,dbname)
    try:
        ci,co,ce = osPopen3(command)
        ci.close()
        if output == "ncbiparsed":
            b_parser = NCBIStandalone.BlastParser()
            blastallout = b_parser.parse(co)
        else:
            blastallout = co.read()
        co.close()
        ce.close()
        # do NOT remove the input fname
    except:
        co.close()
        error = ce.read().strip()
        ce.close()
        print command
        print "ERROR: '%s'" % error
        raise "BLAST CRASHED...."
    # and return!
    return blastallout
Example #2
0
def run_cexpander_dr(settingsfile, outfname, verbose=False):
    """
    Run the cexpander_dr program by specifying input & output filenames etc.

    @type  infname: string
    @param infname: (absolute) path to cbalignp output file

    @type  outfname: string
    @param outfname: (absolute) path to cexpander_dr output file

    @type  commandline: string
    @param commandline: literal additional command line arguments for cexpander_dr

    @type  verbose: Boolean
    @param verbose: print debugging messages to STDOUT (True) or not (False, default)

    @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER
    @attention: see cexpander_dr for (additional) command line options
    @attention: only a subset of cexpander_dr commandline options are supported!

    @rtype:  Boolean
    @return: True
    """
    command = "%s < %s > %s " % (EXECUTABLE_CEXPANDER_CEXPANDER, settingsfile,
                                 outfname)
    if verbose: print command
    ci, co, ce = osPopen3(command)
    ci.close()
    co.close()
    ce.close()
    return True
Example #3
0
def run_cexpander_dr(settingsfile,outfname,verbose=False):
    """
    Run the cexpander_dr program by specifying input & output filenames etc.

    @type  infname: string
    @param infname: (absolute) path to cbalignp output file

    @type  outfname: string
    @param outfname: (absolute) path to cexpander_dr output file

    @type  commandline: string
    @param commandline: literal additional command line arguments for cexpander_dr

    @type  verbose: Boolean
    @param verbose: print debugging messages to STDOUT (True) or not (False, default)

    @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER
    @attention: see cexpander_dr for (additional) command line options
    @attention: only a subset of cexpander_dr commandline options are supported!

    @rtype:  Boolean
    @return: True
    """
    command = "%s < %s > %s " % (
        EXECUTABLE_CEXPANDER_CEXPANDER,
        settingsfile,
        outfname
        )
    if verbose: print command
    ci,co,ce = osPopen3(command)
    ci.close()
    co.close()
    ce.close()
    return True
Example #4
0
def blastall_seq2seq(fastadata=(),filenames=(),output="ncbiparsed",blastprogram="blastp",remove_files=True,extra_blastp_params={'F': 'F', 'e': '10'}):
    """
    choose proper input:
    fastadata   ( ( headerQUERY, seqQUERY ) , ( headerSBJCT, seqSBJCT ) )
     or
    filenames   ( filenameQUERY, filenameSBJCT )
    """
    input = None

    if blastprogram not in ['blastp','tblastn','tblastx','blastx']:
        raise "only blastp and tblastn are supported"
    elif blastprogram in ['tblastn','tblastx']:
        dna_or_prot = "F"
    else:
        dna_or_prot = "T"

    if fastadata and type(fastadata) == type(()) and len(fastadata) == 2 and not filenames:
        # input is fasta headers and sequence
        input = "fastadata"
        # write input filenames
        uniquetag = get_random_string_tag()
        fname_q = "_".join( [ uniquetag, str(fastadata[0][0]), 'Q.fa' ] )
        fname_s = "_".join( [ uniquetag, str(fastadata[1][0]), 'S.fa' ] )
        fh = open(fname_q,'w')
        fh.write(">%s\n%s" % (fastadata[0][0],fastadata[0][1]))
        fh.close()
        fh = open(fname_s,'w')
        fh.write(">%s\n%s" % (fastadata[1][0],fastadata[1][1]))
        fh.close()
    elif filenames and type(filenames) == type(()) and len(filenames) == 2 and not fastadata:
        # input is (supposed to be) filenames
        input = "filenames"
        # get filenames
        fname_q = filenames[0]
        fname_s = filenames[1]
    elif not filenames and not fastadata:
        raise "no input!"
    else:
        raise "inproper input!"

    # formatdb
    OSsystem("%s -i %s -p %s" % (FORMATDB_PATH,fname_s,dna_or_prot))
    # and blastall!
    extra_params = " ".join(["-%s %s" % (k,v) for k,v in extra_blastp_params.iteritems()])
    ci,co,ce = osPopen3("%s -p %s %s -i %s -d %s " % (BLASTALL_PATH,blastprogram,extra_params,fname_q,fname_s))
    ci.close()
    if output == "ncbiparsed":
        b_parser = NCBIStandalone.BlastParser()
        blastallout = b_parser.parse(co)
    else:
        blastallout = co.read()
    co.close()
    ce.close()
    if remove_files:
        OSsystem("rm %s.*" % fname_s)
        osRemove("%s" % fname_s)
        osRemove("%s" % fname_q)
    # and return!
    return blastallout
Example #5
0
def blastall_seq2db(header,
                    sequence,
                    dbname="",
                    blastprogram="blastp",
                    output="ncbiparsed",
                    extra_blastp_params={
                        'F': 'F',
                        'e': '10'
                    }):
    """
    """
    if blastprogram not in ['blastp', 'tblastn', 'blastn', 'blastx']:
        raise "only blastp and tblastn are supported"

    extra_params = " ".join(
        ["-%s %s" % (k, v) for k, v in extra_blastp_params.iteritems()])
    # generate (semi ;-) unique filename
    uniquetag = get_random_string_tag()
    fname = "_".join(
        [uniquetag,
         str(header).replace(" ", "_"), sequence[0:10] + ".fa"])
    fname = osPathJoin(OSgetcwd(), fname)
    fh = open(fname, 'w')
    fh.write(">%s\n%s\n" % (header, sequence))
    fh.close()
    command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH, blastprogram,
                                            extra_params, fname, dbname)
    try:
        ci, co, ce = osPopen3(command)
        ci.close()
        if output == "ncbiparsed":
            b_parser = NCBIStandalone.BlastParser()
            blastallout = b_parser.parse(co)
        else:
            blastallout = co.read()
        co.close()
        ce.close()
    except:
        # for some kind of - obvious or freak accident case -
        # Blast or parsing of the blast record failed
        # No debugging here; just cleanup and return False
        print "BLAST CRASHED::"
        print command
        blastallout = False

    # remove the created Query file
    osRemove(fname)
    # and return!
    return blastallout
Example #6
0
def getorf(sequence=None,fname=None,outputfile=None,
    executable_getorf=EXECUTABLE_GETORF,
    minsize=EXECUTABLE_GETORF_MINSIZE):
    """
    Run EMBOSS getorf and write results to file

    @type  sequence: string
    @param inputfile: full path to getorf input file (fasta)

    @type  outputfile: string
    @param outputfile: full path to getorf output file (fasta)

    @type  executable_getorf: string
    @param executable_getorf: full path to getorf executable
    """
    # do some integrity checks
    if not sequence and not fname:
	message = "specify `sequence` or `fname` variable, not neither"
        raise InproperlyAppliedArgument, message
    if sequence and fname:
	message = "specify `sequence` or `fname` variable, not both"
        raise InproperlyAppliedArgument, message
    if not executable_getorf:
	message = "specify `EXECUTABLE_GETORF` variable"
        raise InproperlyAppliedArgument, message

    # create command line, execute with popen and parse
    command = "%s -minsize %s -noreverse" % (
                executable_getorf,
                minsize
                )
    if fname:      command = "cat %s | %s" % (fname, command)
    else:          command = "echo %s | %s" % (sequence, command)
    if outputfile: command = "%s -outseq %s" % (command, outputfile) 
    else:          command = "%s -filter" % (command)
    ci,co,ce = osPopen3(command)
    ci.close()
    output = co.read()
    co.close()
    error = ce.read()
    ce.close()
    if error and error.strip() != "Finds and extracts open reading frames (ORFs)":
        print "getorf ERROR:", error
    if not outputfile:
        return output
    else:
        return outputfile
Example #7
0
def run_getorf(inputfile="",outputfile="",
    executable_getorf=EXECUTABLE_GETORF,
    minsize=EXECUTABLE_GETORF_MINSIZE):
    """
    Run EMBOSS getorf and write results to file

    @type  inputfile: string
    @param inputfile: full path to getorf input file (fasta)

    @type  outputfile: string
    @param outputfile: full path to getorf output file (fasta)

    @type  EXECUTABLE_GETORF: string
    @param EXECUTABLE_GETORF: full path to getorf executable
    """
    # do some integrity checks
    if not inputfile:
        raise InproperlyAppliedArgument, "specify `inputfile` variable"
    if not outputfile:
        raise InproperlyAppliedArgument, "specify `outputfile` variable"
    if not executable_getorf:
        raise InproperlyAppliedArgument, "specify `executable_getorf` variable"
    # create command line, execute with popen and parse
    command = "%s -sequence %s -outseq %s -minsize %s -noreverse" % (
            executable_getorf,
            inputfile,
            outputfile,
            minsize
            ) 
    ci,co,ce = osPopen3(command)
    ci.close()
    output = co.read()
    co.close()
    error = ce.read()
    ce.close()
    if output:
        print "getorf OUTPUT:", output
    if error and error.strip() != "Finds and extracts open reading frames (ORFs)":
        print "getorf ERROR:", error
Example #8
0
def blastall_file2db(fname,
                     dbname="",
                     blastprogram="blastp",
                     output="ncbiparsed",
                     extra_blastp_params={
                         'F': 'F',
                         'e': '10'
                     }):
    """
    """
    if blastprogram not in ['blastp', 'tblastn', 'blastn', 'tblastx']:
        raise "only blastp and tblastn are supported"

    extra_params = " ".join(
        ["-%s %s" % (k, v) for k, v in extra_blastp_params.iteritems()])
    command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH, blastprogram,
                                            extra_params, fname, dbname)
    try:
        ci, co, ce = osPopen3(command)
        ci.close()
        if output == "ncbiparsed":
            b_parser = NCBIStandalone.BlastParser()
            blastallout = b_parser.parse(co)
        else:
            blastallout = co.read()
        co.close()
        ce.close()
        # do NOT remove the input fname
    except:
        co.close()
        error = ce.read().strip()
        ce.close()
        print command
        print "ERROR: '%s'" % error
        raise "BLAST CRASHED...."
    # and return!
    return blastallout
Example #9
0
def blastall_seq2db(header,sequence,dbname="",blastprogram="blastp",output="ncbiparsed",extra_blastp_params={'F': 'F', 'e': '10'}):
    """
    """
    if blastprogram not in ['blastp','tblastn','blastn','blastx']:
        raise "only blastp and tblastn are supported"

    extra_params = " ".join(["-%s %s" % (k,v) for k,v in extra_blastp_params.iteritems()])
    # generate (semi ;-) unique filename
    uniquetag = get_random_string_tag()
    fname = "_".join( [ uniquetag, str(header).replace(" ","_"), sequence[0:10]+".fa" ] )
    fname = osPathJoin(OSgetcwd(),fname)
    fh = open(fname,'w')
    fh.write(">%s\n%s\n" % (header,sequence))
    fh.close()
    command = "%s -p %s %s -i %s -d %s " % (BLASTALL_PATH,blastprogram,extra_params,fname,dbname)
    try:
        ci,co,ce = osPopen3(command)
        ci.close()
        if output == "ncbiparsed":
            b_parser = NCBIStandalone.BlastParser()
            blastallout = b_parser.parse(co)
        else:
            blastallout = co.read()
        co.close()
        ce.close()
    except:
        # for some kind of - obvious or freak accident case -
        # Blast or parsing of the blast record failed
        # No debugging here; just cleanup and return False
        print "BLAST CRASHED::"
        print command
        blastallout = False

    # remove the created Query file
    osRemove(fname)
    # and return!
    return blastallout
Example #10
0
def runcexpander(fname_fasta, cbalignp_commandline=" -y", output='binary'):
    """
    Run the complete cascade of cexpander algorithms on an input multi fasta
    file and return the output as a CexpanderOutput object

    @type  fname_fasta: string
    @param fname_fasta: path to input multi fasta file

    @type  cbalignp_commandline: string
    @param cbalignp_commandline: (extra) command line for cbalignp

    @type  min_cols: integer
    @param min_cols: minimal number of uniformly matched positions (cols)
                     required to report transfer blocks for (>= 0)

    @type  projected_on: string
    @param projected_on: apply fasta seqeunce header which to use for projection;
                         apply ':::' to do projections on all input sequences

    @attention: requires global variable EXECUTABLE_cexpander_ALLVSALL
    @attention: requires global variable EXECUTABLE_CEXPANDER_CBALIGNP
    @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER
    @attention: see cexpander_dr for (additional) command line options
    @attention: only a subset of cexpander_dr commandline options are supported!
    
    @rtype:  CexpanderOutput object
    @return: CexpanderOutput object
    """
    if not fname_fasta: raise "NoProperFunctionArguments"
    if not osPathIsfile(fname_fasta): raise "FileDoesNotExist"

    # (0) create (~unique) filenames
    uniquetag = get_random_string_tag()
    fname_allvsall = ".".join([fname_fasta, uniquetag, "allvsall"])
    fname_report = ".".join([fname_fasta, uniquetag, "report"])
    fname_aligned = ".".join([fname_fasta, uniquetag, "aligned"])
    fname_settings = ".".join([fname_fasta, uniquetag, "settings"])
    fname_cexpander = ".".join([fname_fasta, uniquetag, "cexpander"])

    # (1) create complete .fa -> cexpanderstring command
    command = """
        python %s %s %s %s;
        %s -i %s %s > %s;
        %s < %s;
        """ % (
        EXECUTABLE_CEXPANDER_ALLVSALL,
        fname_fasta,
        fname_allvsall,
        fname_report,
        EXECUTABLE_CEXPANDER_CBALIGNP,
        fname_allvsall,
        cbalignp_commandline,
        fname_aligned,
        EXECUTABLE_CEXPANDER_CEXPANDER,
        fname_settings,
    )

    # (2) create fname_settings file
    binorfloat = "$dumpcv"
    if output == "float": binorfloat = "$dumpcvc"
    fh = open(fname_settings, 'w')
    content = "\n\n".join([
        "$load\n%s\n%s" % (fname_report, fname_aligned),
        "$addquery\n-1",
        "$run",
        "$dumpentries",
        "$cv_linear",
        "%s" % (binorfloat),  # BINARY == $dumpcv, FLOAT = $dumpcvc
        "$exit\n\n",
    ])
    fh.write(content)
    fh.close()

    # (3) run the command
    ci, co, ce = osPopen3(command)
    ci.close()
    # output of EXECUTABLE_CEXPANDER_ALLVSALL is cast to STDOUT as well!
    cexpanderdata = co.read()
    co.close()
    error = ce.read()
    ce.close()

    # (4) parse fname_cexpander to CexpanderOutput object
    cxpdr = parse_cexpander(cexpanderdata, fname_fasta)

    # (5) cleanup files
    osSystem("rm -f %s %s.%s.*" % (fname_fasta, fname_fasta, uniquetag))

    # (6) return the output object
    return cxpdr
Example #11
0
def blastall_seq2seq(fastadata=(),
                     filenames=(),
                     output="ncbiparsed",
                     blastprogram="blastp",
                     remove_files=True,
                     extra_blastp_params={
                         'F': 'F',
                         'e': '10'
                     }):
    """
    choose proper input:
    fastadata   ( ( headerQUERY, seqQUERY ) , ( headerSBJCT, seqSBJCT ) )
     or
    filenames   ( filenameQUERY, filenameSBJCT )
    """
    input = None

    if blastprogram not in ['blastp', 'tblastn', 'tblastx', 'blastx']:
        raise "only blastp and tblastn are supported"
    elif blastprogram in ['tblastn', 'tblastx']:
        dna_or_prot = "F"
    else:
        dna_or_prot = "T"

    if fastadata and type(fastadata) == type(
        ()) and len(fastadata) == 2 and not filenames:
        # input is fasta headers and sequence
        input = "fastadata"
        # write input filenames
        uniquetag = get_random_string_tag()
        fname_q = "_".join([uniquetag, str(fastadata[0][0]), 'Q.fa'])
        fname_s = "_".join([uniquetag, str(fastadata[1][0]), 'S.fa'])
        fh = open(fname_q, 'w')
        fh.write(">%s\n%s" % (fastadata[0][0], fastadata[0][1]))
        fh.close()
        fh = open(fname_s, 'w')
        fh.write(">%s\n%s" % (fastadata[1][0], fastadata[1][1]))
        fh.close()
    elif filenames and type(filenames) == type(
        ()) and len(filenames) == 2 and not fastadata:
        # input is (supposed to be) filenames
        input = "filenames"
        # get filenames
        fname_q = filenames[0]
        fname_s = filenames[1]
    elif not filenames and not fastadata:
        raise "no input!"
    else:
        raise "inproper input!"

    # formatdb
    OSsystem("%s -i %s -p %s" % (FORMATDB_PATH, fname_s, dna_or_prot))
    # and blastall!
    extra_params = " ".join(
        ["-%s %s" % (k, v) for k, v in extra_blastp_params.iteritems()])
    ci, co, ce = osPopen3(
        "%s -p %s %s -i %s -d %s " %
        (BLASTALL_PATH, blastprogram, extra_params, fname_q, fname_s))
    ci.close()
    if output == "ncbiparsed":
        b_parser = NCBIStandalone.BlastParser()
        blastallout = b_parser.parse(co)
    else:
        blastallout = co.read()
    co.close()
    ce.close()
    if remove_files:
        OSsystem("rm %s.*" % fname_s)
        osRemove("%s" % fname_s)
        osRemove("%s" % fname_q)
    # and return!
    return blastallout
Example #12
0
def runcexpander(fname_fasta,cbalignp_commandline=" -y",output='binary'):
    """
    Run the complete cascade of cexpander algorithms on an input multi fasta
    file and return the output as a CexpanderOutput object

    @type  fname_fasta: string
    @param fname_fasta: path to input multi fasta file

    @type  cbalignp_commandline: string
    @param cbalignp_commandline: (extra) command line for cbalignp

    @type  min_cols: integer
    @param min_cols: minimal number of uniformly matched positions (cols)
                     required to report transfer blocks for (>= 0)

    @type  projected_on: string
    @param projected_on: apply fasta seqeunce header which to use for projection;
                         apply ':::' to do projections on all input sequences

    @attention: requires global variable EXECUTABLE_cexpander_ALLVSALL
    @attention: requires global variable EXECUTABLE_CEXPANDER_CBALIGNP
    @attention: requires global variable EXECUTABLE_CEXPANDER_CEXPANDER
    @attention: see cexpander_dr for (additional) command line options
    @attention: only a subset of cexpander_dr commandline options are supported!
    
    @rtype:  CexpanderOutput object
    @return: CexpanderOutput object
    """
    if not fname_fasta: raise "NoProperFunctionArguments"
    if not osPathIsfile(fname_fasta): raise "FileDoesNotExist"

    # (0) create (~unique) filenames
    uniquetag = get_random_string_tag() 
    fname_allvsall  = ".".join([fname_fasta,uniquetag,"allvsall"])
    fname_report    = ".".join([fname_fasta,uniquetag,"report"])
    fname_aligned   = ".".join([fname_fasta,uniquetag,"aligned"])
    fname_settings  = ".".join([fname_fasta,uniquetag,"settings"])
    fname_cexpander = ".".join([fname_fasta,uniquetag,"cexpander"])

    # (1) create complete .fa -> cexpanderstring command
    command = """
        python %s %s %s %s;
        %s -i %s %s > %s;
        %s < %s;
        """ % (
        EXECUTABLE_CEXPANDER_ALLVSALL,
        fname_fasta,
        fname_allvsall,
        fname_report,
        EXECUTABLE_CEXPANDER_CBALIGNP,
        fname_allvsall,
        cbalignp_commandline,
        fname_aligned,
        EXECUTABLE_CEXPANDER_CEXPANDER,
        fname_settings,
        )


    # (2) create fname_settings file
    binorfloat = "$dumpcv"
    if output == "float": binorfloat = "$dumpcvc"
    fh = open(fname_settings,'w')
    content = "\n\n".join( [
        "$load\n%s\n%s" % (fname_report,fname_aligned),
        "$addquery\n-1",
        "$run",
        "$dumpentries",
        "$cv_linear",
        "%s" % ( binorfloat ), # BINARY == $dumpcv, FLOAT = $dumpcvc
        "$exit\n\n", 
        ] )
    fh.write(content)
    fh.close()


    # (3) run the command
    ci,co,ce = osPopen3(command)
    ci.close()
    # output of EXECUTABLE_CEXPANDER_ALLVSALL is cast to STDOUT as well!
    cexpanderdata = co.read()
    co.close()
    error = ce.read()
    ce.close()

    # (4) parse fname_cexpander to CexpanderOutput object
    cxpdr = parse_cexpander(cexpanderdata,fname_fasta)

    # (5) cleanup files
    osSystem("rm -f %s %s.%s.*" % ( fname_fasta, fname_fasta,uniquetag ) )

    # (6) return the output object
    return cxpdr