Example #1
0
def infoseq(file):
    """
    Run EMBOSS infoseq to 
    Display basic information about sequences
    """
    util.checkFile(file)
    util.checkSoft("infoseq")
    cmd = "infoseq -only -length -noheading %s -outfile %s.infoseq" % (file, file)
    util.runProcess(cmd)
Example #2
0
def union(file, common_name, locus_tag, organism_name, strain):
    """
    Merge scaffolds into one sequence file
    Run EMBOSS union if more than on '>' is found
    """
    util.checkFile(file)
    cmd = "grep '>' %s | wc -l" % file
    result = util.runProcess(cmd)
    if int(result) > 1:
        new_file = "%s.fsa" % common_name
        util.checkSoft("union")
        util.checkSoft("descseq")
        name = "%s [organism=%s] [strain=%s] [gcode=11]" % (locus_tag, organism_name, strain)
        cmd_union = "union -sequence %s -stdout Yes -auto Yes | descseq -filter Yes -name '%s' -auto Yes > %s" % (file, name, new_file)
        util.runProcess(cmd_union)
        return new_file
    else:
        return file
Example #3
0
def checkValidInput(input_file, common_name):
    """
    Check if the input fasta sequence file is of correct format.
    RAST re-arrange the scaffolds if a splitted sequences is submitted
    Run EMBOSS union before if more than on '>' is found
    """
    util.checkFile(input_file)
    cmd = "grep '>' %s | wc -l" % input_file
    result = util.runProcess(cmd)
    if int(result) > 1:
        new_input_file = "%s.fna" % common_name
        util.checkSoft("union")
        util.checkSoft("descseq")
        cmd_union = "union -sequence %s -stdout Yes -auto Yes | descseq -filter Yes -name '%s' -auto Yes > %s" % (input_file, common_name, new_input_file)
        util.runProcess(cmd_union)
        return new_input_file
    else:
        return input_file
Example #4
0
def doRun():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name")
    parser.add_option(
        "-i",
        metavar="FILE",
        help="input organism sequence file in FASTA format",
        action="store",
        type="string",
        dest="input",
    )
    (options, args) = parser.parse_args()

    try:
        common_name = options.name
        input_file = checkValidInput(options.input, common_name)
        output_file = "%s.prodigal" % common_name

        # Print info
        log.info("Running prodigal on %s\n" % common_name)
        log.info("Getting sequence from %s\n" % input_file)

        # Run prodigal
        softname = "prodigal"
        util.checkSoft(softname)
        cmd = "%s < %s > %s" % (softname, input_file, output_file)
        util.runProcess(cmd)

        # Run the conversion only if successful
        if os.path.exists(output_file):
            # Convert output results into a feature table EMBL file.
            tab_file = convertToTab(output_file, common_name)

            # Tidy up
            util.rmFile(common_name + ".fna")
            util.rmFile(output_file)

            log.info("%s is the final feature table Prodigal predictions\n" % tab_file)
        else:
            log.info("%s file does not exists\n" % output_file)
    except Exception, e:
        log.error(e)
        raise e
Example #5
0
def checkValidInput(input_file, common_name):
    """
    Check if the input fasta sequence file is of correct format.
    Segmentation fault while running glimmer on splitted sequences with a fasta file
    Run EMBOSS union before if more than on '>' is found
    """
    try:
        util.checkFile(input_file)
        cmd = "grep '>' %s | wc -l" % input_file
        result = util.runProcess(cmd)
        if int(result) > 1:
            new_input_file = "%s.fna" % common_name
            util.checkSoft("union")
            util.checkSoft("descseq")
            cmd_union = "union -sequence %s -stdout Yes -auto Yes | descseq -filter Yes -name '%s' -auto Yes > %s" % (input_file, common_name, new_input_file)
            util.runProcess(cmd_union)
            return new_input_file
        else:
            return input_file
    except util.UtilException, ue:
        raise ue