def silly_function(ui):
        for c_value in ui.series(coloring_values):
            sample_ids = sample_ids_from_metadata_description(open(mapping_fp, 'U'),
                '%s:%s' % (coloring_header_name, c_value))

            _headers, _data = filter_mapping_file(data, headers, sample_ids, True)
            per_color_subject_values = list(set([row[subject_index] for row in _data]))

            fd = open(join(output_path, 'color_by_'+c_value+'.txt'), 'w')
            for s in ui.series(per_color_subject_values):
                fd.write('%s\n' % s)
            fd.close()

            if not suppress_trajectory_files:
                for s in ui.series(per_color_subject_values):
                    filename = join(output_path, s+'.txt')

                    if opts.verbose:
                        print 'Working on printing', filename

                    COMMAND_CALL = FILTER_CMD % (coords_fp, mapping_fp,
                        '%s:%s' % (subject_header_name, s), filename,
                        sorting_category)
                    o, e, r = qiime_system_call(COMMAND_CALL)
                    if opts.verbose and e:
                        print 'Error happened on filtering step: \n%s' % e
                        continue

                    COMMAND_CALL = CONVERSION_CMD % (filename, filename)
                    o, e, r = qiime_system_call(COMMAND_CALL)
                    if opts.verbose and e:
                        print 'Error happened on conversion step: \n%s' % e
                        continue # useless here but just in case
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.submit_jobs and not opts.make_jobs:
        option_parser.error('Must pass -m if passing -s. (Sorry about this, '
                            'it\'s for backwards-compatibility.)')

    min_args = 2
    if len(args) != min_args:
        option_parser.error('Program requires <commands file> and '
                            '<job prefix>')

    if (len(args[1]) > 10 or len(args[1]) == 0):
        option_parser.error('job prefix must be 1-10 characters long')

    if(not exists(opts.job_dir)):
        try:
            makedirs(opts.job_dir)
        except OSError:
            exit(" Jobs directory can not be created. "
                 "Check for permissions or file with the same name: %s\n"
                 % opts.job_dir)

    commands = list(open(args[0]))
    job_prefix = args[1]

    if opts.mem_per_cpu:
        mem_per_cpu = " --mem_per_cpu=" + opts.mem_per_cpu
    else:
        mem_per_cpu = ""

    if opts.queue:
        queue = " -p " + opts.queue
    else:
        queue = ""

    if (opts.make_jobs):
        filenames = make_jobs(
            commands,
            job_prefix,
            opts.queue,
            opts.job_dir)
    else:
        exit("Should we ever get here???")
    if (opts.submit_jobs):
        for f in filenames:
            qiime_system_call("".join([
                    "sbatch",
                    queue,
                    " -J ", job_prefix,
                    mem_per_cpu,
                    " -o ", normpath(opts.job_dir), sep, job_prefix, "_%j.out",
                    " ", f
                ]), shell=True)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.submit_jobs and not opts.make_jobs:
        option_parser.error('Must pass -m if passing -s. (Sorry about this, '
                            'it\'s for backwards-compatibility.)')

    min_args = 2
    if len(args) != min_args:
        option_parser.error('Program requires <commands file> and '
                            '<job prefix>')

    if (len(args[1]) > 10 or len(args[1]) == 0):
        option_parser.error('job prefix must be 1-10 characters long')

    if(not exists(opts.job_dir)):
        try:
            makedirs(opts.job_dir)
        except OSError:
            exit(" Jobs directory can not be created. "
                 "Check for permissions or file with the same name: %s\n"
                 % opts.job_dir)

    commands = list(open(args[0]))
    job_prefix = args[1]

    if opts.mem_per_cpu:
        mem_per_cpu = " --mem-per-cpu=" + opts.mem_per_cpu
    else:
        mem_per_cpu = ""

    if opts.queue:
        queue = " -p " + opts.queue
    else:
        queue = ""

    if (opts.make_jobs):
        filenames = make_jobs(
            commands,
            job_prefix,
            opts.queue,
            opts.job_dir)
    else:
        exit("Should we ever get here???")
    if (opts.submit_jobs):
        for f in filenames:
            qiime_system_call("".join([
                    "sbatch",
                    queue,
                    " -J ", job_prefix,
                    mem_per_cpu,
                    " -o ", normpath(opts.job_dir), sep, job_prefix, "_%j.out",
                    " ", f
                ]), shell=True)
Example #4
0
def call_commands_serially(commands,
                           status_update_callback,
                           logger,
                           close_logger_on_success=True):
    """Run list of commands, one after another """
    logger.write("Executing commands.\n\n")
    for c in commands:
        for e in c:
            status_update_callback('%s\n%s' % e)
            logger.write('# %s command \n%s\n\n' % e)
            stdout, stderr, return_value = qiime_system_call(e[1])
            if return_value != 0:
                msg = "\n\n*** ERROR RAISED DURING STEP: %s\n" % e[0] +\
                    "Command run was:\n %s\n" % e[1] +\
                    "Command returned exit status: %d\n" % return_value +\
                    "Stdout:\n%s\nStderr\n%s\n" % (stdout, stderr)
                logger.write(msg)
                logger.close()
                raise WorkflowError(msg)
            # in the no error case, we write commands' output to the log
            # and also echo to this proc's stdout/stderr
            else:
                # write stdout and stderr to log file
                logger.write("Stdout:\n%s\nStderr:\n%s\n" % (stdout, stderr))
                # write stdout to stdout
                if stdout:
                    print stdout
                # write stderr to stderr
                if stderr:
                    sys.stderr.write(stderr)
    if close_logger_on_success:
        logger.close()
Example #5
0
def generate_random_password(min_len=8, max_len=12):
    """Returns a random alphanumeric password of random length.

    Returns both unencrypted and encrypted password. Encryption is performed
    via Apache's htpasswd command, using their custom MD5 algorithm.

    Length will be randomly chosen from within the specified bounds
    (inclusive).
    """
    # Modified from
    # http://code.activestate.com/recipes/59873-random-password-generation
    chars = letters + digits
    length = randint(min_len, max_len)
    password = ''.join([choice(chars) for i in range(length)])

    # This is hackish but should work for now...
    stdout, stderr, ret_val = qiime_system_call('htpasswd -nbm foobarbaz %s' %
                                                password)
    if ret_val != 0:
        raise ValueError("Error executing htpasswd command. Do you have this "
                         "command on your machine?")

    # Will be in the form foobarbaz:<encrypted password>
    encrypted_password = stdout.strip().split('foobarbaz:', 1)[1]

    return password, encrypted_password
    def test_mothur_supported_version(self):
        """mothur is in path and version is supported """
        acceptable_version = (1, 25, 0)
        self.assertTrue(
            which('mothur'),
            "mothur not found. This may or may not be a problem depending on "
            + "which components of QIIME you plan to use.")
        # mothur creates a log file in cwd, so create a tmp and cd there first
        log_file = join(get_qiime_temp_dir(), 'mothur.log')
        command = "mothur \"#set.logfile(name=%s)\" | grep '^mothur v'" % log_file
        stdout, stderr, exit_Status = qiime_system_call(command)

        # remove log file
        remove_files([log_file], error_on_missing=False)

        version_string = stdout.strip().split(' ')[1].strip('v.')
        try:
            version = tuple(map(int, version_string.split('.')))
            pass_test = version == acceptable_version
        except ValueError:
            pass_test = False
            version_string = stdout
        self.assertTrue(
            pass_test,
            "Unsupported mothur version. %s is required, but running %s." %
            ('.'.join(map(str, acceptable_version)), version_string))
Example #7
0
def copy_support_files(file_path):
    """Copy the support files to a named destination 

    file_path: path where you want the support files to be copied to

    Will raise EmperorSupportFilesError if a problem is found whilst trying to
    copy the files.
    """
    file_path = join(file_path, 'emperor_required_resources')

    if exists(file_path) == False:
        create_dir(file_path, False)

    # shutil.copytree does not provide an easy way to copy the contents of a
    # directory into another existing directory, hence the system call.
    # use double quotes for the paths to escape any invalid chracter(s)/spaces
    cmd = 'cp -R "%s/"* "%s"' % (get_emperor_support_files_dir(),
                                 abspath(file_path))
    cmd_o, cmd_e, cmd_r = qiime_system_call(cmd)

    if cmd_e:
        raise EmperorSupportFilesError, "Error found whilst trying to copy " +\
            "the support files:\n%s\n Could not execute: %s" % (cmd_e, cmd)

    return
Example #8
0
def _iter_ids_over_system_call(cmd_fmt, sample_ids, opts):
    """Iteratively execute a system call over sample IDs

    Parameters
    ----------
    cmd_fmt : str
        The format of the command to execute. It is expected that there
        is a single string format to be done and it should take a sample
        ID
    sample_ids : Iterable of str
        A list of sample IDs of interest

    Returns
    -------
    dict
        A dict containing each sample ID and any errors observed or None if
        no error was observed for the sample. {str: str or None}
    """
    results = {}

    for id_ in sample_ids:
        cmd = cmd_fmt % {'result_path': _result_path(opts, id_), 'id': id_}
        stdout, stderr, return_value = qiime_system_call(cmd)

        if return_value != 0:
            msg = stderr.splitlines()
            results[id_] = 'FAILED (%s): %s' % (msg[-1] if msg else '', cmd)
        else:
            results[id_] = None

    return results
Example #9
0
def call_commands_serially(commands,
                           status_update_callback,
                           logger,
                           close_logger_on_success=True):
    """Run list of commands, one after another """
    logger.write("Executing commands.\n\n")
    for c in commands:
        for e in c:
            status_update_callback('%s\n%s' % e)
            logger.write('# %s command \n%s\n\n' % e)
            stdout, stderr, return_value = qiime_system_call(e[1])
            if return_value != 0:
                msg = "\n\n*** ERROR RAISED DURING STEP: %s\n" % e[0] +\
                 "Command run was:\n %s\n" % e[1] +\
                 "Command returned exit status: %d\n" % return_value +\
                 "Stdout:\n%s\nStderr\n%s\n" % (stdout,stderr)
                logger.write(msg)
                logger.close()
                raise WorkflowError, msg
            # in the no error case, we write commands' output to the log
            # and also echo to this proc's stdout/stderr
            else:
                # write stdout and stderr to log file
                logger.write("Stdout:\n%s\nStderr:\n%s\n" % (stdout,stderr))
                # write stdout to stdout
                if stdout:
                    print stdout
                # write stderr to stderr
                if stderr:
                    sys.stderr.write(stderr)
    if close_logger_on_success: logger.close()
Example #10
0
    def test_mothur_supported_version(self):
        """mothur is in path and version is supported """
        acceptable_version = (1, 25, 0)
        self.assertTrue(
            which("mothur"),
            "mothur not found. This may or may not be a problem depending on "
            + "which components of QIIME you plan to use.",
        )
        # mothur creates a log file in cwd, so create a tmp and cd there first
        log_file = join(get_qiime_temp_dir(), "mothur.log")
        command = "mothur \"#set.logfile(name=%s)\" | grep '^mothur v'" % log_file
        stdout, stderr, exit_Status = qiime_system_call(command)

        # remove log file
        remove_files([log_file], error_on_missing=False)

        version_string = stdout.strip().split(" ")[1].strip("v.")
        try:
            version = tuple(map(int, version_string.split(".")))
            pass_test = version == acceptable_version
        except ValueError:
            pass_test = False
            version_string = stdout
        self.assertTrue(
            pass_test,
            "Unsupported mothur version. %s is required, but running %s."
            % (".".join(map(str, acceptable_version)), version_string),
        )
Example #11
0
def copy_support_files(file_path):
    """Copy the support files to a named destination 

    file_path: path where you want the support files to be copied to

    Will raise EmperorSupportFilesError if a problem is found whilst trying to
    copy the files.
    """
    file_path = join(file_path, "emperor_required_resources")

    if exists(file_path) == False:
        create_dir(file_path, False)

    # shutil.copytree does not provide an easy way to copy the contents of a
    # directory into another existing directory, hence the system call.
    # use double quotes for the paths to escape any invalid chracter(s)/spaces
    cmd = 'cp -R "%s/"* "%s"' % (get_emperor_support_files_dir(), abspath(file_path))
    cmd_o, cmd_e, cmd_r = qiime_system_call(cmd)

    if cmd_e:
        raise EmperorSupportFilesError, "Error found whilst trying to copy " + "the support files:\n%s\n Could not execute: %s" % (
            cmd_e,
            cmd,
        )

    return
Example #12
0
def _iter_ids_over_system_call(cmd_fmt, sample_ids, opts):
    """Iteratively execute a system call over sample IDs

    Parameters
    ----------
    cmd_fmt : str
        The format of the command to execute. It is expected that there
        is a single string format to be done and it should take a sample
        ID
    sample_ids : Iterable of str
        A list of sample IDs of interest

    Returns
    -------
    dict
        A dict containing each sample ID and any errors observed or None if
        no error was observed for the sample. {str: str or None}
    """
    results = {}

    for id_ in sample_ids:
        cmd = cmd_fmt % {'result_path': _result_path(opts, id_),
                         'id': id_}
        stdout, stderr, return_value = qiime_system_call(cmd)

        if return_value != 0:
            msg = stderr.splitlines()
            results[id_] = 'FAILED (%s): %s' % (msg[-1] if msg else '', cmd)
        else:
            results[id_] = None

    return results
Example #13
0
def run_command(cmd):
    stdout, stderr, ret_val = qiime_system_call(cmd)

    if ret_val != 0:
        raise ExternalCommandFailedError("The command '%s' failed with exit "
                                         "status %d.\n\nStdout:\n\n%s\n\n"
                                         "Stderr:\n\n%s\n" % (cmd,
                                         ret_val, stdout, stderr))
Example #14
0
def make_line_plot(
        dir_path, data_file_link, background_color, label_color, xy_coords,
        props, x_len=8, y_len=4, draw_axes=False, generate_eps=True):
    """ Write a line plot

    xy_coords: a dict of form
       {series_label:([x data], [y data], point_marker, color)}

    (code adapted from Micah Hamady's code)
    """
    rc('font', size='8')
    rc('axes', linewidth=.5, edgecolor=label_color)
    rc('axes', labelsize=8)
    rc('xtick', labelsize=8)
    rc('ytick', labelsize=8)
    fig, ax = plt.subplots(figsize=(x_len, y_len))
    mtitle = props.get("title", "Groups")
    x_label = props.get("xlabel", "X")
    y_label = props.get("ylabel", "Y")

    ax.set_title('%s' % mtitle, fontsize='10', color=label_color)
    ax.set_xlabel(x_label, fontsize='8', color=label_color)
    ax.set_ylabel(y_label, fontsize='8', color=label_color)

    sorted_keys = sorted(xy_coords.keys())

    for s_label in sorted_keys:
        s_data = xy_coords[s_label]
        c = s_data[3]
        m = s_data[2]
        ax.plot(s_data[0], s_data[1], c=c, marker=m, label=s_label,
                linewidth=.1, ms=5, alpha=1.0)

    fp = FontProperties()
    fp.set_size('8')
    ax.legend(prop=fp, loc=0)

    img_name = 'scree_plot.png'
    fig.savefig(
        os.path.join(dir_path,
                     img_name),
        dpi=80,
        facecolor=background_color)

    # Create zipped eps files
    eps_link = ""
    if generate_eps:
        eps_img_name = str('scree_plot.eps')
        fig.savefig(os.path.join(dir_path, eps_img_name), format='eps')
        out, err, retcode = qiime_system_call(
            "gzip -f " + os.path.join(dir_path, eps_img_name))
        eps_link = DOWNLOAD_LINK % ((os.path.join(data_file_link,
                                                  eps_img_name) +
                                     ".gz"), "Download Figure")

    return os.path.join(data_file_link, img_name), eps_link
Example #15
0
def get_emperor_library_version():
    """Get Emperor version and the git SHA + current branch (if applicable)"""
    emperor_dir = get_emperor_project_dir()
    emperor_version = emperor_library_version

    # more information could be retrieved following this pattern
    sha_cmd = "git --git-dir %s/.git rev-parse HEAD" % (emperor_dir)
    sha_o, sha_e, sha_r = qiime_system_call(sha_cmd)
    git_sha = sha_o.strip()

    branch_cmd = "git --git-dir %s/.git rev-parse --abbrev-ref HEAD" % (emperor_dir)
    branch_o, branch_e, branch_r = qiime_system_call(branch_cmd)
    git_branch = branch_o.strip()

    # validate the output from both command calls
    if is_valid_git_refname(git_branch) and is_valid_git_sha1(git_sha):
        return "%s, %s@%s" % (emperor_version, git_branch, git_sha[0:7])
    else:
        return "%s" % emperor_version
Example #16
0
def submit_jobs(path_to_cluster_jobs, jobs_fp, job_prefix):
    """ Submit the jobs to the queue using cluster_jobs.py
    """
    cmd = '%s -ms %s %s' % (path_to_cluster_jobs, jobs_fp, job_prefix)
    stdout, stderr, return_value = qiime_system_call(cmd)
    if return_value != 0:
        msg = "\n\n*** Could not start parallel jobs. \n" +\
         "Command run was:\n %s\n" % cmd +\
         "Command returned exit status: %d\n" % return_value +\
         "Stdout:\n%s\nStderr\n%s\n" % (stdout,stderr)
        raise RuntimeError, msg
Example #17
0
def get_emperor_library_version():
    """Get Emperor version and the git SHA + current branch (if applicable)"""
    emperor_dir = get_emperor_project_dir()
    emperor_version = emperor_library_version

    # more information could be retrieved following this pattern
    sha_cmd = 'git --git-dir %s/.git rev-parse HEAD' % (emperor_dir)
    sha_o, sha_e, sha_r = qiime_system_call(sha_cmd)
    git_sha = sha_o.strip()

    branch_cmd = 'git --git-dir %s/.git rev-parse --abbrev-ref HEAD' %\
        (emperor_dir)
    branch_o, branch_e, branch_r = qiime_system_call(branch_cmd)
    git_branch = branch_o.strip()

    # validate the output from both command calls
    if is_valid_git_refname(git_branch) and is_valid_git_sha1(git_sha):
        return '%s, %s@%s' % (emperor_version, git_branch, git_sha[0:7])
    else:
        return '%s' % emperor_version
def call_cmd(cmd, HALT_EXEC):
    if HALT_EXEC:
        print cmd
        exit(0)
    else:
        stdout, stderr, exit_status = qiime_system_call(cmd)
        if exit_status != 0:
            print "indexdb_rna failed!\nSTDOUT\n%s\nSTDERR\n%s\n" \
                   % (stdout, stderr)
            exit(1)
    return cmd
def run_commands(output_dir,commands,run_id,submit_jobs,keep_temp,queue_name):
    """
    """
    job_fps, paths_to_remove = write_job_files(output_dir,commands,run_id,queue_name)
    
    # Call the jobs
    if submit_jobs:
        for job_fp in job_fps:
            qiime_system_call(' '.join(['qsub', job_fp]))
    
    # clean up the shell scripts that were created
    if not keep_temp:
        for p in paths_to_remove:
            try:
                # p is file
                remove(p)
            except OSError:
                # p is directory
                rmtree(p)
    return
Example #20
0
def run_commands(output_dir, commands, run_id, submit_jobs, keep_temp,
                 queue_name):
    """
    """
    job_fps, paths_to_remove = write_job_files(output_dir, commands, run_id,
                                               queue_name)

    # Call the jobs
    if submit_jobs:
        for job_fp in job_fps:
            qiime_system_call(' '.join(['qsub', job_fp]))

    # clean up the shell scripts that were created
    if not keep_temp:
        for p in paths_to_remove:
            try:
                # p is file
                remove(p)
            except OSError:
                # p is directory
                rmtree(p)
    return
Example #21
0
    def _submit_jobs(self, jobs_fp, job_prefix):
        """ Submit the jobs to the queue using cluster_jobs.py
        """
        cmd = '%s -ms %s %s' % (self._cluster_jobs_fp, jobs_fp, job_prefix)
        stdout, stderr, return_value = qiime_system_call(cmd)
        if return_value != 0:
            msg = "\n\n*** Could not start parallel jobs. \n" +\
                "Command run was:\n %s\n" % cmd +\
                "Command returned exit status: %d\n" % return_value +\
                "Stdout:\n%s\nStderr\n%s\n" % (stdout, stderr)
            raise RuntimeError(msg)

        # Leave this comments in as they're useful for debugging.
        # print 'Return value: %d\n' % return_value
        # print 'STDOUT: %s\n' % stdout
        # print 'STDERR: %s\n' % stderr

        return stdout, stderr, return_value
Example #22
0
    def _submit_jobs(self, jobs_fp, job_prefix):
        """ Submit the jobs to the queue using cluster_jobs.py
        """
        cmd = "%s -ms %s %s" % (self._cluster_jobs_fp, jobs_fp, job_prefix)
        stdout, stderr, return_value = qiime_system_call(cmd)
        if return_value != 0:
            msg = (
                "\n\n*** Could not start parallel jobs. \n"
                + "Command run was:\n %s\n" % cmd
                + "Command returned exit status: %d\n" % return_value
                + "Stdout:\n%s\nStderr\n%s\n" % (stdout, stderr)
            )
            raise RuntimeError, msg

        # Leave this comments in as they're useful for debugging.
        # print 'Return value: %d\n' % return_value
        # print 'STDOUT: %s\n' % stdout
        # print 'STDERR: %s\n' % stderr

        return stdout, stderr, return_value
Example #23
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if (opts.suppress_unit_tests):
        option_parser.error("Tests, suppresed. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of Evident's unit tests, and keep track of any files which
    # fail unit tests.
    if not opts.suppress_unit_tests:
        unittest_names = []
        if not opts.unittest_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith('test_') and name.endswith('.py'):
                        unittest_names.append(join(root,name))
        else:
            for fp in glob(opts.unittest_glob):
                fn = split(fp)[1]
                if fn.startswith('test_') and fn.endswith('.py'):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = '%s %s -v' % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    print "==============\nResult summary\n=============="

    if not opts.suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)
    
        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due "+\
            "to missing external applications.\nDepending on the Evident features "+\
            "you plan to use, this may not be critical.\n%s"\
             % '\n'.join(missing_application_tests)
        
        if not (missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"
    
    qiime_test_data_dir_exists = True

    # In case there were no failures of any type, exit with a return code of 0
    return_code = 1
    if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and\
        qiime_test_data_dir_exists):
        return_code = 0

    return return_code
def submit_fasta_and_split_lib(data_access,fasta_files,metadata_study_id,
                                  input_dir):
    """
        FASTA Loading: This function takes the fasta filenames and using that
        path, determines the location of the split-library and picked-otu files.
        Once file locations have been determined, it moves the files to the DB
        machine and load the files into the DB.
    """
    # get DB connection and cursor
    con = data_access.getSFFDatabaseConnection()
    cur = con.cursor()
    
    # check if study exists
    study_id_exists=data_access.checkIfStudyIdExists(metadata_study_id)
    print "Study ID exists: " + str(study_id_exists)
    
    # get temp filename
    alphabet = "ABCDEFGHIJKLMNOPQRSTUZWXYZ"
    alphabet += alphabet.lower()
    alphabet += "01234567890"
    random_fname=''.join([choice(alphabet) for i in range(10)])
    tmp_filename ='_'+random_fname+'_'+strftime("%Y_%m_%d_%H_%M_%S")
    
    # get fasta filenames
    fasta_filenames=fasta_files.split(',')
    seq_run_id=0  
    analysis_id=0    
    split_lib_input_checksums=[]

    ### by disabling constraints you can speed up loading as well, but shouldn't
    ### be necessary
    #valid = data_access.disableTableConstraints()
    #print "Disabled table constraints"

    # split the fasta filenames and determine filepaths
    for fasta_fname in fasta_filenames:
        input_fname, input_ext = splitext(split(fasta_fname)[-1])
        input_basename, input_ext = splitext(fasta_fname)
        
        # define analysis notes
        analysis_notes=split(input_basename)[0]
        
        # get md5 for raw fasta files
        fasta_md5 = safe_md5(open(fasta_fname)).hexdigest()
        print 'MD5 is: %s' % str(fasta_md5)

        # create an analysis row in analysis table
        if analysis_id==0:
            analysis_id=data_access.createAnalysis(metadata_study_id)
        
        # check if fasta info already loaded
        fasta_exists=data_access.checkIfSFFExists(fasta_md5)
        print 'fasta in database? %s' % str(fasta_exists)
        
        # if fasta info not loaded, then insert into DB
        if not fasta_exists:
            if seq_run_id==0:
                seq_run_id=data_access.createSequencingRun(True,'FASTA',
                                                           None,seq_run_id)
            
            # get sequence count 
            count_seqs_cmd="grep '^>' %s | wc -l" % (fasta_fname)
            o,e,r = qiime_system_call(count_seqs_cmd)
            seq_counts = o.strip()
            
            # add fasta info
            valid=data_access.addSFFFileInfo(True,input_fname,
                                          seq_counts,
                                          None,
                                          None,
                                          None,
                                          None,
                                          None,
                                          None,
                                          fasta_md5,seq_run_id)
        else:
            seq_run_id=data_access.getSeqRunIDUsingMD5(fasta_md5)
             
    print 'sequence_run_id is: %s' % str(seq_run_id)
    
    # get md5 sum for input to split-libraries
    split_lib_input_md5sum=safe_md5(MD5Wrap(fasta_filenames)).hexdigest()
    print split_lib_input_md5sum
    print 'Finished loading the processed FASTA data!'
    print 'Run ID: %s' % seq_run_id
    print 'Analysis ID: %s' % analysis_id

    # update analysis table with seq_run_id
    valid=data_access.updateAnalysisWithSeqRunID(True,analysis_id,seq_run_id)
    if not valid:
        raise ValueError, 'Error: Unable to append SEQ_RUN_ID into ANALYSIS table!'
                                              
    return analysis_id,input_dir,seq_run_id,split_lib_input_md5sum
Example #25
0
def make_interactive_scatter(plot_label,
                             dir_path,
                             data_file_link,
                             background_color,
                             label_color,
                             sample_location,
                             alpha,
                             xy_coords,
                             props,
                             x_len=8,
                             y_len=4,
                             size=10,
                             draw_axes=False,
                             generate_eps=True):
    """Write interactive plot

    xy_coords: a dict of form {series_label:([x data], [y data], \
    [xy point label],[color])}
    """
    my_axis = None
    rc('font', size='8')
    rc('patch', linewidth=0)
    rc('axes', linewidth=.5, edgecolor=label_color)
    rc('axes', labelsize=8)
    rc('xtick', labelsize=8, color=label_color)
    rc('ytick', labelsize=8, color=label_color)

    sc_plot = draw_scatterplot(props, xy_coords, x_len, y_len, size,
                               background_color, label_color, sample_location,
                               alpha)

    mtitle = props.get("title", "Groups")
    x_label = props.get("xlabel", "X")
    y_label = props.get("ylabel", "Y")

    title('%s' % mtitle, fontsize='10', color=label_color)
    xlabel(x_label, fontsize='8', color=label_color)
    ylabel(y_label, fontsize='8', color=label_color)

    show()

    if draw_axes:
        axvline(linewidth=.5, x=0, color=label_color)
        axhline(linewidth=.5, y=0, color=label_color)

    if my_axis is not None:
        axis(my_axis)
    img_name = x_label[0:3] + '_vs_' + y_label[0:3] + '_plot.png'
    savefig(os.path.join(dir_path, img_name),
            dpi=80,
            facecolor=background_color)

    # Create zipped eps files
    eps_link = ""
    if generate_eps:
        eps_img_name = str(x_label[0:3] + 'vs' + y_label[0:3] + 'plot.eps')
        savefig(os.path.join(dir_path, eps_img_name), format='eps')
        out, err, retcode = qiime_system_call(
            "gzip -f " + os.path.join(dir_path, eps_img_name))
        eps_link = DOWNLOAD_LINK % (
            (os.path.join(data_file_link, eps_img_name) + ".gz"),
            "Download Figure")

    all_cids, all_xcoords, all_ycoords = transform_xy_coords(
        xy_coords, sc_plot)

    xmap, img_height, img_width = generate_xmap(x_len, y_len, all_cids,
                                                all_xcoords, all_ycoords)

    points_id = plot_label + x_label[2:3] + y_label[2:3]

    return IMG_MAP_SRC % (os.path.join(data_file_link, img_name), points_id,
                          img_width, img_height), MAP_SRC % \
        (points_id, ''.join(xmap)), eps_link
Example #26
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.suppress_unit_tests and opts.suppress_script_tests and opts.suppress_script_usage_tests:
        option_parser.error("You're suppressing all three test types. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile("OK\s*$")
    application_not_found_pattern = re.compile("ApplicationNotFoundError")
    python_name = "python"
    bad_tests = []
    missing_application_tests = []

    # Run through all of QIIME's unit tests, and keep track of any files which
    # fail unit tests.
    if not opts.suppress_unit_tests:
        unittest_names = []
        if not opts.unittest_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith("test_") and name.endswith(".py"):
                        unittest_names.append(join(root, name))
        else:
            for fp in glob(opts.unittest_glob):
                fn = split(fp)[1]
                if fn.startswith("test_") and fn.endswith(".py"):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = "%s %s -v" % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    bad_scripts = []
    if not opts.suppress_script_tests:
        # Run through all of QIIME's scripts, and pass -h to each one. If the
        # resulting stdout does not being with the Usage text, that is an
        # indicator of something being wrong with the script. Issues that would
        # cause that are bad import statements in the script, SyntaxErrors, or
        # other failures prior to running qiime.util.parse_command_line_parameters.

        try:
            scripts_dir = get_qiime_scripts_dir()
            script_directory_found = True
        except AssertionError:
            script_directory_found = False

        if script_directory_found:
            script_names = []
            script_names = glob("%s/*py" % scripts_dir)
            script_names.sort()

            for script_name in script_names:
                script_good_pattern = re.compile("^Usage: %s" % split(script_name)[1])
                print "Testing %s." % script_name
                command = "%s %s -h" % (python_name, script_name)
                stdout, stderr, return_value = qiime_system_call(command)
                if not script_good_pattern.search(stdout):
                    bad_scripts.append(script_name)

    num_script_usage_example_failures = 0
    qiime_test_data_dir = qiime_config["qiime_test_data_dir"]
    if not opts.suppress_script_usage_tests and qiime_test_data_dir != None:
        # Run the script usage testing functionality
        script_usage_result_summary, num_script_usage_example_failures = run_script_usage_tests(
            qiime_test_data_dir=qiime_test_data_dir,
            qiime_scripts_dir=qiime_config["qiime_scripts_dir"],
            working_dir=qiime_config["temp_dir"],
            verbose=True,
            tests=None,  # runs all
            failure_log_fp=None,
            force_overwrite=True,
        )

    print "==============\nResult summary\n=============="

    if not opts.suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" % "\n".join(bad_tests)

        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due " + "to missing external applications.\nDepending on the QIIME features " + "you plan to use, this may not be critical.\n%s" % "\n".join(
                missing_application_tests
            )

        if not (missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"

    if not opts.suppress_script_tests:
        print "\nBasic script test result summary\n--------------------------------\n"
        if not script_directory_found:
            print "Critical error: Failed to test scripts because the script directory could not be found.\n The most likely explanation for this failure is that you've installed QIIME using setup.py, and forgot to specify the qiime_scripts_dir in your qiime_config file. This value shoud be set either to the directory you provided for --install-scripts, or /usr/local/bin if no value was provided to --install-scripts."
        else:
            if bad_scripts:
                print "Failed the following basic script tests.\n%s" % "\n".join(bad_scripts)
            else:
                print "All basic script tests passed successfully.\n"

    qiime_test_data_dir_exists = True
    if not opts.suppress_script_usage_tests:
        if qiime_test_data_dir:
            print "\nScript usage test result summary\n------------------------------------\n"
            print script_usage_result_summary
        else:
            print "\nCould not run script usage tests because qiime_test_data_dir is not defined in your qiime_config."
            qiime_test_data_dir_exists = False
        print ""

    # If any of the unit tests, script tests, or script usage tests fail, or if
    # we have any missing application errors or a missing QIIME test data dir
    # if script usage tests weren't suppressed, use return code 1 (as python's
    # unittest module does to indicate one or more failures).
    return_code = 1
    if (
        len(bad_tests) == 0
        and len(missing_application_tests) == 0
        and len(bad_scripts) == 0
        and num_script_usage_example_failures == 0
        and qiime_test_data_dir_exists
    ):
        return_code = 0
    return return_code
Example #27
0
def create_personal_results(mapping_fp, 
                            distance_matrix_fp, 
                            collated_dir_fp, 
                            output_fp, prefs_fp, 
                            personal_id_field, 
                            personal_ids=None, 
                            column_title=None, 
                            individual_titles=None):
    map_as_list, header, comments = parse_mapping_file(open(mapping_fp, 'U'))
    try:
        personal_id_index = header.index(personal_id_field)
    except ValueError:
        raise ValueError("personal id field (%s) is not a mapping file column header" %
                         personal_id_field)
    if column_title == None: 
        header.append('Self')
    else: 
        header.append(column_title)
    if personal_ids == None: 
        personal_ids  = get_personal_ids(map_as_list, personal_id_index)
    else:
        for id in personal_ids.split(','):
            if id not in get_personal_ids(map_as_list, personal_id_index):
                raise ValueError('%s is not an id in the mapping file.' %id)
        personal_ids = personal_ids.split(',')
    output_directories = []
    makedirs(output_fp)
    for person_of_interest in personal_ids:
        makedirs(join(output_fp, person_of_interest))
        pcoa_dir = join(output_fp, person_of_interest, "beta_diversity")
        rarefaction_dir = join(output_fp, person_of_interest, "alpha_rarefaction")
        output_directories.append(pcoa_dir)
        output_directories.append(rarefaction_dir)
        personal_mapping_file_fp = join(output_fp, person_of_interest, "mapping_file.txt")
        html_fp = join(output_fp, person_of_interest, "index.html")
        create_personal_mapping_file(map_as_list,
                                     header,
                                     comments,
                                     person_of_interest,
                                     personal_mapping_file_fp, 
                                     personal_id_index, 
                                     individual_titles)
        create_index_html(person_of_interest, html_fp)
        cmd = "make_rarefaction_plots.py -i %s -m %s -p %s -o %s" % (collated_dir_fp, 
                                                                     personal_mapping_file_fp,
                                                                     prefs_fp, 
                                                                     rarefaction_dir)          
        stdout, stderr, return_code = qiime_system_call(cmd)
        if return_code != 0:
            raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
            (cmd, stdout, stderr))
        cmd = "make_3d_plots.py -m %s -p %s -i %s -o %s" % (personal_mapping_file_fp, 
                                                            prefs_fp, 
                                                            distance_matrix_fp, 
                                                            pcoa_dir)
        stdout, stderr, return_code = qiime_system_call(cmd)
        if return_code != 0:
            raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
             (cmd, stdout, stderr))
        
    return output_directories
Example #28
0
def run_script_usage_tests(qiime_test_data_dir,
                           qiime_scripts_dir,
                           working_dir,
                           verbose=False,
                           tests=None,
                           failure_log_fp=None,
                           force_overwrite=False):
    """ Test script_usage examples when test data is present in qiime_test_data_dir
    
        These tests are currently used with the qiime_test_data repository, which can
        be found at https://github.com/qiime-dev/qiime_test_data

        Returns a result summary string and the number of script usage
        examples (i.e. commands) that failed.
    """
    # process input filepaths and directories
    qiime_test_data_dir = abspath(qiime_test_data_dir)
    working_dir = join(working_dir,'script_usage_tests')
    if force_overwrite and exists(working_dir):
        rmtree(working_dir)
    if failure_log_fp != None:
        failure_log_fp = abspath(failure_log_fp)

    if tests == None:
        tests = [split(d)[1] for d in glob('%s/*' % qiime_test_data_dir) if isdir(d)]
    
    if verbose:
        print 'Tests to run:\n %s' % ' '.join(tests)
    
    addsitedir(qiime_scripts_dir)
    
    failed_tests = []
    warnings = []
    total_tests = 0
    for test in tests:
        
        # import the usage examples - this is possible because we added 
        # qiime_scripts_dir to the PYTHONPATH above
        script_fn = '%s/%s.py' % (qiime_scripts_dir,test)
        script = __import__(test)
        usage_examples = script.script_info['script_usage']
        
        if verbose:
            print 'Testing %d usage examples from: %s' % (len(usage_examples),script_fn)
        
        # init the test environment
        test_input_dir = '%s/%s' % (qiime_test_data_dir,test)
        test_working_dir = '%s/%s' % (working_dir,test)
        copytree(test_input_dir,test_working_dir)
        chdir(test_working_dir)
        
        # remove pre-exisitng output files if any
        try:
            script_usage_output_to_remove = script.script_info['script_usage_output_to_remove']
        except KeyError:
            script_usage_output_to_remove = []
        for e in script_usage_output_to_remove:
            rmtree(e.replace('$PWD',getcwd()),ignore_errors=True)
        
        if verbose:
            print ' Running tests in: %s' % getcwd()
            print ' Tests:'
        
        for usage_example in usage_examples:
            if '%prog' not in usage_example[2]:
                warnings.append('%s usage examples do not all use %%prog to represent the command name. You may not be running the version of the command that you think you are!' % test)
            cmd = usage_example[2].replace('%prog',script_fn)
            if verbose:
                print '  %s' % cmd
            stdout, stderr, return_value = qiime_system_call(cmd)
            total_tests += 1
            if return_value != 0:
                failed_tests.append((cmd, stdout, stderr, return_value))
        
        if verbose:
            print ''
            
    if failure_log_fp:
        failure_log_f = open(failure_log_fp,'w')
        if len(failed_tests) == 0:
            failure_log_f.write('All script interface tests passed.\n')
        else:
            i = 0
            for cmd, stdout, stderr, return_value in failed_tests:
                failure_log_f.write('**Failed test %d:\n%s\n\nReturn value: %d\n\nStdout:\n%s\n\nStderr:\n%s\n\n' % (i,cmd,return_value, stdout, stderr))
        failure_log_f.close()
    
    
    if warnings:
        print 'Warnings:'
        for warning in warnings:
            print ' ' + warning
        print ''
    
    result_summary = 'Ran %d commands to test %d scripts. %d of these commands failed.' % (total_tests,len(tests),len(failed_tests))
    if len(failed_tests) > 0:
        failed_scripts = set([split(e[0].split()[0])[1] for e in failed_tests])
        result_summary += '\nFailed scripts were: %s' % " ".join(failed_scripts)
    if failure_log_fp:
        result_summary += "\nFailures are summarized in %s" % failure_log_fp
    
    rmtree(working_dir)
    
    return result_summary, len(failed_tests)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    chuck_norris_joke = opts.chuck_norris_joke
    coordinates_fp = opts.coordinates_fp
    mapping_file_fp = opts.mapping_file_fp
    category_header_name = opts.category
    output_fp = opts.output_fp

    # have a swell day Yoshiki from the future 
    if chuck_norris_joke:
        o, e, _ = qiime_system_call('curl http://api.icndb.com/jokes/random')

        exec 'joke = %s' % o.strip()
        print joke['value']['joke']
        exit(0)

    coords_headers, coords_data, coords_eigenvalues, coords_percents =\
        parse_coords(open(coordinates_fp, 'U'))
    mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_file_fp, 'U'))

    category_header_index = mapping_headers.index(category_header_name)
    category_names = list(set([line[category_header_index]
        for line in mapping_data]))


    main_figure = plt.figure()
    main_axes = main_figure.add_subplot(1, 1, 1, axisbg='black')
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    main_axes.tick_params(axis='y', colors='none')
    main_axes.tick_params(axis='x', colors='none')
 

    # sort the data!!! that way you can match make_3d_plots.py
    sorted_categories = natsort(category_names)
    colors_used = []

    for index, category in enumerate(sorted_categories):
        sample_ids_list = [line[0] for line in mapping_data if line[category_header_index] == category]

        qiime_color = get_qiime_hex_string_color(index)

        if len(sample_ids_list) < 3:
            continue

        colors_used.append(qiime_color)

        indices = [coords_headers.index(sample_id) for sample_id in sample_ids_list]
        points = coords_data[indices, :2]# * coords_percents[:2]

        hull = ConvexHull(points)
        main_axes.plot(points[:,0], points[:,1], 'o', color=qiime_color)
        for simplex in hull.simplices:
            main_axes.plot(points[simplex,0], points[simplex,1], 'w-')
        main_axes.plot(points[hull.vertices,0], points[hull.vertices,1], '--', lw=2, color=qiime_color)
        # plt.plot(points[hull.vertices[0],0], points[hull.vertices[0],1], '--', color=qiime_color)
    # plt.show()

    main_figure.savefig(output_fp)

    name = splitext(output_fp)[0]
    extension = splitext(output_fp)[1].replace('.', '')

    make_legend(sorted_categories, colors_used, 0, 0, 'black', 'white', name,
                extension, 80)
Example #30
0
def make_interactive_scatter(plot_label, dir_path, data_file_link,
                             background_color, label_color, sample_location,
                             alpha, xy_coords,
                             props, x_len=8, y_len=4, size=10,
                             draw_axes=False, generate_eps=True):
    """Write interactive plot

    xy_coords: a dict of form {series_label:([x data], [y data], \
    [xy point label],[color])}
    """
    my_axis = None
    rc('font', size='8')
    rc('patch', linewidth=0)
    rc('axes', linewidth=.5, edgecolor=label_color)
    rc('axes', labelsize=8)
    rc('xtick', labelsize=8, color=label_color)
    rc('ytick', labelsize=8, color=label_color)

    sc_plot = draw_scatterplot(props, xy_coords, x_len, y_len, size,
                               background_color, label_color, sample_location,
                               alpha)

    mtitle = props.get("title", "Groups")
    x_label = props.get("xlabel", "X")
    y_label = props.get("ylabel", "Y")
    ax = plt.gca()
    fig = ax.figure
    ax.set_title('%s' % mtitle, fontsize='10', color=label_color)
    ax.set_xlabel(x_label, fontsize='8', color=label_color)
    ax.set_ylabel(y_label, fontsize='8', color=label_color)

    if draw_axes:
        ax.axvline(linewidth=.5, x=0, color=label_color)
        ax.axhline(linewidth=.5, y=0, color=label_color)

    if my_axis is not None:
        ax.axis(my_axis)
    img_name = x_label[0:3] + '_vs_' + y_label[0:3] + '_plot.png'
    fig.savefig(os.path.join(dir_path, img_name),
                dpi=80, facecolor=background_color)

    # Create zipped eps files
    eps_link = ""
    if generate_eps:
        eps_img_name = str(x_label[0:3] + 'vs' + y_label[0:3] + 'plot.eps')
        fig.savefig(os.path.join(dir_path, eps_img_name), format='eps')
        out, err, retcode = qiime_system_call(
            "gzip -f " + os.path.join(dir_path, eps_img_name))
        eps_link = DOWNLOAD_LINK % ((os.path.join(data_file_link, eps_img_name)
                                     + ".gz"), "Download Figure")

    all_cids, all_xcoords, all_ycoords = transform_xy_coords(
        xy_coords, sc_plot)

    xmap, img_height, img_width = generate_xmap(
        x_len, y_len, all_cids, all_xcoords,
        all_ycoords)

    points_id = plot_label + x_label[2:3] + y_label[2:3]

    return IMG_MAP_SRC % (os.path.join(data_file_link, img_name), points_id,
                          img_width, img_height), MAP_SRC % \
        (points_id, ''.join(xmap)), eps_link
def get_cluster_ratio(fasta_seqs, min_difference_in_clusters):
    """
    Uses uclust to calculate cluster ratio
    cluster_ratio =
    num_of_seq_in_cluster_with_max_seq
    divided by
    num_of_seq_in cluster_with_second_higest_seq
    Parameters
    ----------
    fasta_seqs: list
        list of fasta sequences
    min_difference_in_clusters: float
        percent identity threshold for cluster formation
    Returns
    ----------
    cluster_ratio: float
        cluster ratio of the sequences using uclust
        cluster_ratio =
        num_of_seq_in_cluster_with_max_seq /
        num_of_seq_in cluster_with_second_higest_seq
    """
    cluster_percent_id = min_difference_in_clusters
    temp_dir = get_qiime_temp_dir()
    fd_uc, uclust_tempfile_name = mkstemp(dir=temp_dir, suffix='.uc')
    close(fd_uc)
    fd_fas, fasta_tempfile_name = mkstemp(dir=temp_dir, suffix='.uc')
    close(fd_fas)
    with open(fasta_tempfile_name, 'w') as fasta_tempfile:
        fasta_tempfile.write(fasta_seqs)
    fasta_tempfile.close()
    count = 0
    command = "uclust --usersort --input {} --uc {} --id 0.98".format(
        fasta_tempfile_name, uclust_tempfile_name)
    # In the function, I am calling uclust a large number of times.
    # Initially I was using from bfillings.get_clusters_from_fasta_filepath
    # but due to issue (biocore/bfillingss#31), I have temporarily
    # reverted to qiime_system_call.

    count_lookup = defaultdict(int)

    qiime_system_call(command)
    uclust_tempfile = open(uclust_tempfile_name, 'r')
    for line in uclust_tempfile:
        if search(r'^C', line):
            pieces = line.split('\t')
            count_lookup[pieces[1]] += int(pieces[2])
            count += 1
    uclust_tempfile.close()
    files_to_be_removed = list()
    files_to_be_removed.append(uclust_tempfile_name)
    remove_files(files_to_be_removed)

    sorted_counts_in_clusters = sorted(
        count_lookup.iteritems(),
        key=lambda x: x[1], reverse=True)
    try:
        max_cluster_count = \
            float(str(sorted_counts_in_clusters[0][1]))
        second_cluster_count = \
            float(str(sorted_counts_in_clusters[1][1]))
        return max_cluster_count / second_cluster_count
    except IndexError:
        return 1
Example #32
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if (opts.suppress_unit_tests):
        option_parser.error("Tests, suppresed. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of Evident's unit tests, and keep track of any files which
    # fail unit tests.
    if not opts.suppress_unit_tests:
        unittest_names = []
        if not opts.unittest_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith('test_') and name.endswith('.py'):
                        unittest_names.append(join(root, name))
        else:
            for fp in glob(opts.unittest_glob):
                fn = split(fp)[1]
                if fn.startswith('test_') and fn.endswith('.py'):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = '%s %s -v' % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    print "==============\nResult summary\n=============="

    if not opts.suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" % '\n'.join(
                bad_tests)

        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due "+\
            "to missing external applications.\nDepending on the Evident features "+\
            "you plan to use, this may not be critical.\n%s"\
             % '\n'.join(missing_application_tests)

        if not (missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"

    qiime_test_data_dir_exists = True

    # In case there were no failures of any type, exit with a return code of 0
    return_code = 1
    if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and\
        qiime_test_data_dir_exists):
        return_code = 0

    return return_code
Example #33
0
# just indicate whether it's installed or not
try:
    import gdata
except ImportError:
    gdata_installed = "Not installed."
else:
    gdata_installed = "Installed."

pynast_lib_version = get_pynast_version()
if pynast_lib_version is None:
    pynast_lib_version = "Not installed."

if which('sortmerna') is None:
    sortmerna_lib_version = "Not installed."
else:
    _, serr, _ = qiime_system_call("sortmerna --version")
    sortmerna_lib_version = serr.strip()

if which('sumaclust') is None:
    sumaclust_lib_version = "Not installed."
else:
    sout, _, _ = qiime_system_call("sumaclust --help")
    sout_lines = sout.split('\n')
    sumaclust_lib_version = "Installed, but can't identify version."
    for e in sout_lines:
        e = e.strip()
        if e.startswith('SUMACLUST Version'):
            sumaclust_lib_version = e
            break

script_info = {}
Example #34
0
    def __call__(self,
                 input_fp,
                 output_dir,
                 params,
                 job_prefix=None,
                 poll_directly=False,
                 suppress_submit_jobs=False):
        """ """
        ## Generate a list of files and directories that will need to be cleaned up
        self.files_to_remove = []
        
        # Perform any method-specific setup. This should prevent the need to 
        # overwrite __call__
        self._call_initialization(input_fp,
                                  output_dir,
                                  params,
                                  job_prefix,
                                  poll_directly,
                                  suppress_submit_jobs)
    
        # split the input filepath into directory and filename, base filename and
        # extension for use in naming other files
        try:
            input_dir, input_fn = split(input_fp)
            input_file_basename, input_ext = splitext(input_fn)
        except AttributeError:
            ## THIS IS AWFUL - SOME OF THE SCRIPTS PASS A LIST, SO THE
            ## PREVIOUS BLOCK WON'T WORK... WHAT DO WE WANT TO DO?
            input_dir, input_fn = split(input_fp[0])
            input_file_basename, input_ext = splitext(input_fn)
        
        # Allow the user to override the default job_prefix (defined by the 
        # base classes)
        if job_prefix is None:
            job_prefix = self._get_random_job_prefix(self._job_prefix)
        # A temporary output directory is created in output_dir named
        # job_prefix. Output files are then moved from the temporary
        # directory to the output directory when they are complete,
        # allowing a poller to detect when runs complete by the presence
        # of their output files.
        working_dir = join(output_dir,job_prefix)
        try:
            makedirs(working_dir)
            self.files_to_remove.append(working_dir)
        except OSError:
            # working dir already exists
            pass
        
        # Split the input file into the individual job input files. Add the
        # individual job files to the files_to_remove list
        input_fps, remove_input_on_completion = self._input_splitter(
                                         input_fp,
                                         params,
                                         self._jobs_to_start,
                                         job_prefix,
                                         working_dir)
        if remove_input_on_completion:
            self.files_to_remove += input_fps
        
        # Perform any method-specific setup (e.g., formatting a BLAST database)
        self._precommand_initiation(input_fp,output_dir,working_dir,params)
        
        # Generate the list of commands to be pushed out to workers 
        # and the list of output files generated by each job.
        commands, job_result_filepaths = self._get_job_commands(input_fps,
                                                                output_dir,
                                                                params,
                                                                job_prefix,
                                                                working_dir)
        self.files_to_remove += \
         self._identify_files_to_remove(job_result_filepaths,params)

        # Generate the output clean-up files
        merge_map_filepath, deletion_list_filepath, expected_files_filepath =\
         self._initialize_output_cleanup_files(job_result_filepaths,
                                               output_dir,
                                               working_dir,
                                               input_file_basename,
                                               params)

        # Set up poller apparatus if the user does not suppress polling
        if not self._suppress_polling:
            poller_command = self._initiate_polling(job_result_filepaths,
                                                    working_dir,
                                                    poll_directly,
                                                    merge_map_filepath,
                                                    deletion_list_filepath,
                                                    expected_files_filepath)
        
        # If the poller should be run in the same way as the other commands
        # (rather than by the current process), add it to the list of commands
        if not poll_directly:
            commands.append(poller_command)
     
        # Build the filepath for the 'jobs script'. Add that file to the 
        # files_to_remove list.
        jobs_fp = join(working_dir,job_prefix + 'jobs.txt')
        self._write_jobs_file(commands,jobs_fp)
        self.files_to_remove.append(jobs_fp)
    
        # submit the jobs file using cluster_jobs, if not suppressed by the
        # user
        if not suppress_submit_jobs:
            stdout, stderr, return_value = self._submit_jobs(
             jobs_fp=jobs_fp, job_prefix=job_prefix)
        
        # If the poller is going to be run by the current process, 
        # start polling
        if poll_directly:
            # IMPORTANT: the following line MUST use qiime_system_call()
            # instead of subprocess.call, .check_call, or .check_output in case
            # we are invoked in a child process with PIPEs (a deadlock will
            # occur otherwise). This can happen if this code is tested by
            # all_tests.py, for example.
            stdout, stderr, return_value = qiime_system_call(poller_command)
            if return_value != 0:
                print '**Error occuring when calling the poller directly. '+\
                'Jobs may have been submitted, but are not being polled.'
                print stderr
                print poller_command
                exit(-1)
        self.files_to_remove = []

        # Perform any method-specific cleanup. This should prevent the need to 
        # overwrite __call__
        self._call_cleanup(input_fp,
                           output_dir,
                           params,
                           job_prefix,
                           poll_directly,
                           suppress_submit_jobs)
Example #35
0
try:
    import h5py
    h5py_lib_version = (h5py.__version__ +
                        ' (HDF5 version: %s)' % h5py.version.hdf5_version)
except ImportError:
    h5py_lib_version = "Not installed."

pynast_lib_version = get_pynast_version()
if pynast_lib_version is None:
    pynast_lib_version = "Not installed."

if which('sortmerna') is None:
    sortmerna_lib_version = "Not installed."
else:
    _, serr, _ = qiime_system_call("sortmerna --version")
    sortmerna_lib_version = serr.strip()

if which('sumaclust') is None:
    sumaclust_lib_version = "Not installed."
else:
    sout, _, _ = qiime_system_call("sumaclust --help")
    sout_lines = sout.split('\n')
    sumaclust_lib_version = "Installed, but can't identify version."
    for e in sout_lines:
        e = e.strip()
        if e.startswith('SUMACLUST Version'):
            sumaclust_lib_version = e
            break

if which('swarm') is None:
Example #36
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if (opts.suppress_unit_tests and opts.suppress_script_usage_tests):
        option_parser.error(
            "You're suppressing both test types. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of QIIME's unit tests, and keep track of any files which
    # fail unit tests.
    if not opts.suppress_unit_tests:
        unittest_names = []
        if not opts.unittest_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith('test_') and name.endswith('.py'):
                        unittest_names.append(join(root, name))
        else:
            for fp in glob(opts.unittest_glob):
                fn = split(fp)[1]
                if fn.startswith('test_') and fn.endswith('.py'):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = '%s %s -v' % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    qiime_test_data_dir = join(get_qiime_project_dir(), 'qiime_test_data')
    qiime_test_data_dir_exists = exists(qiime_test_data_dir)
    if not opts.suppress_script_usage_tests and qiime_test_data_dir_exists:
        if opts.script_usage_tests is not None:
            script_usage_tests = opts.script_usage_tests.split(',')
        else:
            script_usage_tests = None

        # Run the script usage testing functionality
        script_usage_result_summary, has_script_usage_example_failures = \
            run_script_usage_tests(
                test_data_dir=qiime_test_data_dir,
                scripts_dir=get_qiime_scripts_dir(),
                working_dir=qiime_config['temp_dir'],
                verbose=True,
                tests=script_usage_tests,
                force_overwrite=True,
                timeout=240)

    print "==============\nResult summary\n=============="

    if not opts.suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests)

        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due " +\
                "to missing external applications.\nDepending on the QIIME features " +\
                "you plan to use, this may not be critical.\n%s"\
                % '\n'.join(missing_application_tests)

        if not (missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"

    if not opts.suppress_script_usage_tests:
        if qiime_test_data_dir_exists:
            print "\nScript usage test result summary\n--------------------------------\n"
            print script_usage_result_summary
        else:
            print "\nCould not run script usage tests because the directory %s does not exist." % qiime_test_data_dir
        print ""

    # If script usage tests weren't suppressed, the qiime_test_data dir must
    # exist and we can't have any failures.
    script_usage_tests_success = (opts.suppress_script_usage_tests or
                                  (qiime_test_data_dir_exists and
                                   not has_script_usage_example_failures))

    # If any of the unit tests or script usage tests fail, or if we have any
    # missing application errors, use return code 1 (as python's unittest
    # module does to indicate one or more failures).
    return_code = 1
    if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and
            script_usage_tests_success):
        return_code = 0
    return return_code
Example #37
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    unittest_glob = opts.unittest_glob
    temp_filepath = opts.temp_filepath
    script_usage_tests = opts.script_usage_tests
    suppress_unit_tests = opts.suppress_unit_tests
    suppress_script_usage_tests = opts.suppress_script_usage_tests

    # since the test data is in the tests folder just add scripts_test_data
    emperor_test_data_dir = join(abspath(dirname(__file__)),
        'scripts_test_data/')

    # offer the option for the user to pass the scripts dir from the command
    # line since there is no other way to get the scripts dir. If not provided
    # the base structure of the repository will be assumed. Note that for both
    # cases we are using absolute paths, to avoid unwanted failures.
    if opts.emperor_scripts_dir == None:
        emperor_scripts_dir = abspath(join(get_emperor_project_dir(),
            'scripts/'))
    else:
        emperor_scripts_dir = abspath(opts.emperor_scripts_dir)

    # make a sanity check
    if (suppress_unit_tests and suppress_script_usage_tests):
        option_parser.error("All tests have been suppresed. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of Emperor's unit tests, and keep track of any files which
    # fail unit tests, note that these are the unit tests only
    if not suppress_unit_tests:
        unittest_names = []
        if not unittest_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith('test_') and name.endswith('.py'):
                        unittest_names.append(join(root,name))
        else:
            for fp in glob(unittest_glob):
                fn = split(fp)[1]
                if fn.startswith('test_') and fn.endswith('.py'):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = '%s %s -v' % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    script_usage_failures = 0

    # choose to run some of the script usage tests or all the available ones
    if not suppress_script_usage_tests and exists(emperor_test_data_dir) and\
        exists(emperor_scripts_dir):
        if script_usage_tests != None:
            script_tests = script_usage_tests.split(',')
        else:
            script_tests = None
        # Run the script usage testing functionality
        script_usage_result_summary, script_usage_failures = \
            run_script_usage_tests(qiime_test_data_dir=emperor_test_data_dir,
            qiime_scripts_dir=emperor_scripts_dir,
            working_dir=temp_filepath, verbose=True,
            tests=script_tests, failure_log_fp=None, force_overwrite=False)

    print "==============\nResult summary\n=============="

    if not suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" %'\n'.join(bad_tests)
    
        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due "+\
                "to missing external applications.\nDepending on the Emperor "+\
                "features you plan to use, this may not be critical.\n%s"\
                % '\n'.join(missing_application_tests)
        
        if not(missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"

    if not suppress_script_usage_tests:
        if exists(emperor_test_data_dir) and exists(emperor_scripts_dir):
            print "\nScript usage test result summary"+\
                "\n------------------------------------\n"
            print script_usage_result_summary
        else:
            print ("\nCould not run script usage tests.\nThe Emperor scripts "
                "directory could not be automatically located, try supplying "
                " it manually using the --emperor_scripts_dir option.")

    # In case there were no failures of any type, exit with a return code of 0
    return_code = 1
    if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and
        script_usage_failures == 0):
        return_code = 0

    return return_code
Example #38
0
File: test.py Project: jb2263/qiime
def run_script_usage_tests(qiime_test_data_dir,
                           qiime_scripts_dir,
                           working_dir,
                           verbose=False,
                           tests=None,
                           failure_log_fp=None,
                           force_overwrite=False):
    """ Test script_usage examples when test data is present in qiime_test_data_dir
    
        These tests are currently used with the qiime_test_data repository, which can
        be found at https://github.com/qiime-dev/qiime_test_data

        Returns a result summary string and the number of script usage
        examples (i.e. commands) that failed.
    """
    # process input filepaths and directories
    qiime_test_data_dir = abspath(qiime_test_data_dir)
    working_dir = join(working_dir, 'script_usage_tests')
    if force_overwrite and exists(working_dir):
        rmtree(working_dir)
    if failure_log_fp != None:
        failure_log_fp = abspath(failure_log_fp)

    if tests == None:
        tests = [
            split(d)[1] for d in glob('%s/*' % qiime_test_data_dir) if isdir(d)
        ]

    if verbose:
        print 'Tests to run:\n %s' % ' '.join(tests)

    addsitedir(qiime_scripts_dir)

    failed_tests = []
    warnings = []
    total_tests = 0
    for test in tests:

        # import the usage examples - this is possible because we added
        # qiime_scripts_dir to the PYTHONPATH above
        script_fn = '%s/%s.py' % (qiime_scripts_dir, test)
        script = __import__(test)
        usage_examples = script.script_info['script_usage']

        if verbose:
            print 'Testing %d usage examples from: %s' % (len(usage_examples),
                                                          script_fn)

        # init the test environment
        test_input_dir = '%s/%s' % (qiime_test_data_dir, test)
        test_working_dir = '%s/%s' % (working_dir, test)
        copytree(test_input_dir, test_working_dir)
        chdir(test_working_dir)

        # remove pre-exisitng output files if any
        try:
            script_usage_output_to_remove = script.script_info[
                'script_usage_output_to_remove']
        except KeyError:
            script_usage_output_to_remove = []
        for e in script_usage_output_to_remove:
            rmtree(e.replace('$PWD', getcwd()), ignore_errors=True)
            remove_files([e.replace('$PWD', getcwd())], error_on_missing=False)

        if verbose:
            print ' Running tests in: %s' % getcwd()
            print ' Tests:'

        for usage_example in usage_examples:
            if '%prog' not in usage_example[2]:
                warnings.append(
                    '%s usage examples do not all use %%prog to represent the command name. You may not be running the version of the command that you think you are!'
                    % test)
            cmd = usage_example[2].replace('%prog', script_fn)
            if verbose:
                print '  %s' % cmd
            stdout, stderr, return_value = qiime_system_call(cmd)
            total_tests += 1
            if return_value != 0:
                failed_tests.append((cmd, stdout, stderr, return_value))

        if verbose:
            print ''

    if failure_log_fp:
        failure_log_f = open(failure_log_fp, 'w')
        if len(failed_tests) == 0:
            failure_log_f.write('All script interface tests passed.\n')
        else:
            i = 0
            for cmd, stdout, stderr, return_value in failed_tests:
                failure_log_f.write(
                    '**Failed test %d:\n%s\n\nReturn value: %d\n\nStdout:\n%s\n\nStderr:\n%s\n\n'
                    % (i, cmd, return_value, stdout, stderr))
        failure_log_f.close()

    if warnings:
        print 'Warnings:'
        for warning in warnings:
            print ' ' + warning
        print ''

    result_summary = 'Ran %d commands to test %d scripts. %d of these commands failed.' % (
        total_tests, len(tests), len(failed_tests))
    if len(failed_tests) > 0:
        failed_scripts = set([split(e[0].split()[0])[1] for e in failed_tests])
        result_summary += '\nFailed scripts were: %s' % " ".join(
            failed_scripts)
    if failure_log_fp:
        result_summary += "\nFailures are summarized in %s" % failure_log_fp

    rmtree(working_dir)

    return result_summary, len(failed_tests)
Example #39
0
try:
    import h5py
    h5py_lib_version = (
     h5py.__version__ + ' (HDF5 version: %s)' % h5py.version.hdf5_version)
except ImportError:
    h5py_lib_version = "Not installed."


pynast_lib_version = get_pynast_version()
if pynast_lib_version is None:
    pynast_lib_version = "Not installed."

if which('sortmerna') is None:
    sortmerna_lib_version = "Not installed."
else:
    _, serr, _ = qiime_system_call("sortmerna --version")
    sortmerna_lib_version = serr.strip()

if which('sumaclust') is None:
    sumaclust_lib_version = "Not installed."
else:
    sout, _, _ = qiime_system_call("sumaclust --help")
    sout_lines = sout.split('\n')
    sumaclust_lib_version = "Installed, but can't identify version."
    for e in sout_lines:
        e = e.strip()
        if e.startswith('SUMACLUST Version'):
            sumaclust_lib_version = e
            break

if which('swarm') is None:
Example #40
0
File: util.py Project: wilkox/qiime
    def __call__(self,
                 input_fp,
                 output_dir,
                 params,
                 job_prefix=None,
                 poll_directly=False,
                 suppress_submit_jobs=False):
        """ """
        # Generate a list of files and directories that will be cleaned up
        self.files_to_remove = []

        # Allow the user to override the default job_prefix (defined by the
        # base classes)
        if job_prefix is None:
            job_prefix = self._get_random_job_prefix(self._job_prefix)

        # A temporary output directory is created in output_dir named
        # job_prefix. Output files are then moved from the temporary
        # directory to the output directory when they are complete,
        # allowing a poller to detect when runs complete by the presence
        # of their output files.
        working_dir = join(output_dir, job_prefix)
        try:
            makedirs(working_dir)
            self.files_to_remove.append(working_dir)
        except OSError:
            # working dir already exists
            pass

        # Perform any method-specific setup. This should prevent the need to
        # overwrite __call__
        self._call_initialization(input_fp,
                                  output_dir,
                                  params,
                                  job_prefix,
                                  poll_directly,
                                  suppress_submit_jobs)

        # split the input filepath into directory and filename, base filename and
        # extension for use in naming other files
        try:
            input_dir, input_fn = split(input_fp)
            input_file_basename, input_ext = splitext(input_fn)
        except AttributeError:
            # THIS IS AWFUL - SOME OF THE SCRIPTS PASS A LIST, SO THE
            # PREVIOUS BLOCK WON'T WORK... WHAT DO WE WANT TO DO?
            input_dir, input_fn = split(input_fp[0])
            input_file_basename, input_ext = splitext(input_fn)

        # Split the input file into the individual job input files. Add the
        # individual job files to the files_to_remove list
        input_fps, remove_input_on_completion = self._input_splitter(
            input_fp,
            params,
            self._jobs_to_start,
            job_prefix,
            working_dir)
        if remove_input_on_completion:
            self.files_to_remove += input_fps

        # Perform any method-specific setup (e.g., formatting a BLAST database)
        self._precommand_initiation(input_fp, output_dir, working_dir, params)

        # Generate the list of commands to be pushed out to workers
        # and the list of output files generated by each job.
        commands, job_result_filepaths = self._get_job_commands(input_fps,
                                                                output_dir,
                                                                params,
                                                                job_prefix,
                                                                working_dir)
        self.files_to_remove += \
            self._identify_files_to_remove(job_result_filepaths, params)

        # Generate the output clean-up files
        merge_map_filepath, deletion_list_filepath, expected_files_filepath =\
            self._initialize_output_cleanup_files(job_result_filepaths,
                                                  output_dir,
                                                  working_dir,
                                                  input_file_basename,
                                                  params)

        # Set up poller apparatus if the user does not suppress polling
        if not self._suppress_polling:
            poller_command = self._initiate_polling(job_result_filepaths,
                                                    working_dir,
                                                    poll_directly,
                                                    merge_map_filepath,
                                                    deletion_list_filepath,
                                                    expected_files_filepath)

        # If the poller should be run in the same way as the other commands
        # (rather than by the current process), add it to the list of commands
        if not poll_directly:
            commands.append(poller_command)

        # Build the filepath for the 'jobs script'. Add that file to the
        # files_to_remove list.
        jobs_fp = join(working_dir, job_prefix + 'jobs.txt')
        self._write_jobs_file(commands, jobs_fp)
        self.files_to_remove.append(jobs_fp)

        # submit the jobs file using cluster_jobs, if not suppressed by the
        # user
        if not suppress_submit_jobs:
            stdout, stderr, return_value = self._submit_jobs(
                jobs_fp=jobs_fp, job_prefix=job_prefix)

        # If the poller is going to be run by the current process,
        # start polling
        if poll_directly:
            # IMPORTANT: the following line MUST use qiime_system_call()
            # instead of subprocess.call, .check_call, or .check_output in case
            # we are invoked in a child process with PIPEs (a deadlock will
            # occur otherwise). This can happen if this code is tested by
            # all_tests.py, for example.
            stdout, stderr, return_value = qiime_system_call(poller_command)
            if return_value != 0:
                print '**Error occuring when calling the poller directly. ' +\
                    'Jobs may have been submitted, but are not being polled.'
                print stderr
                print poller_command
                exit(-1)
        self.files_to_remove = []

        # Perform any method-specific cleanup. This should prevent the need to
        # overwrite __call__
        self._call_cleanup(input_fp,
                           output_dir,
                           params,
                           job_prefix,
                           poll_directly,
                           suppress_submit_jobs)
def submit_illumina_and_split_lib(data_access,fastq_files,metadata_study_id,
                                  input_dir):
    """
        Illumina Loading: This function takes the fasta filenames and using 
        that path, determines the location of the split-library and picked-otu
        files.  Once file locations have been determined, it moves the files to
        the DB machine and load the files into the DB.
    """
    
    # get DB connection and cursor
    con = data_access.getSFFDatabaseConnection()
    cur = con.cursor()
    
    ### this may help in speeding up loading but shouldn't be necessary
    #print 'Rebuilding PK_SPLIT_LIBRARY_READ_MAP...'
    #cur.execute('alter index "SFF"."PK_SPLIT_LIBRARY_READ_MAP" rebuild ')
    #cur = con.cursor()
    
    # check if study exists
    study_id_exists=data_access.checkIfStudyIdExists(metadata_study_id)
    print "Study ID exists: " + str(study_id_exists)
    
    # get temp filename
    alphabet = "ABCDEFGHIJKLMNOPQRSTUZWXYZ"
    alphabet += alphabet.lower()
    alphabet += "01234567890"
    random_fname=''.join([choice(alphabet) for i in range(10)])
    tmp_filename ='_'+random_fname+'_'+strftime("%Y_%m_%d_%H_%M_%S")
    
    # get fastq filenames
    fastq_filenames=fastq_files.split(',')
    seq_run_id=0  
    analysis_id=0    
    split_lib_input_checksums=[]

    ### by disabling constraints you can speed up loading as well, but shouldn't
    ### be necessary
    #valid = data_access.disableTableConstraints()
    #print "Disabled table constraints"

    #split the fastq filenames and determine filepaths
    for fastq_fname in fastq_filenames:
        input_fname, input_ext = splitext(split(fastq_fname)[-1])
        input_basename, input_ext = splitext(fastq_fname)
        
        # get analysis notes
        analysis_notes=split(input_basename)[0]
        
        # get md5 for raw fastq files
        fastq_md5 = safe_md5(open(fastq_fname)).hexdigest()
        print 'MD5 is: %s' % str(fastq_md5)

        # create an analysis row in analysis table
        if analysis_id==0:
            analysis_id=data_access.createAnalysis(metadata_study_id)
        
        # check if fastq info already loaded
        fastq_exists=data_access.checkIfSFFExists(fastq_md5)
        print 'fastq in database? %s' % str(fastq_exists)
        
        # if fastq info not loaded, then insert into DB
        if not fastq_exists:
            if seq_run_id==0:
                seq_run_id=data_access.createSequencingRun(True,'ILLUMINA',
                                                           None,seq_run_id)
            
            # get sequence count
            if fastq_fname.endswith('.gz'):
                count_seqs_cmd = "zcat %s | grep ^@ | wc -l" % (fastq_fname)
            else:
                count_seqs_cmd="grep ^@ %s | wc -l" % (fastq_fname)
            o,e,r = qiime_system_call(count_seqs_cmd)
            seq_counts = o.strip()
            
            # get header length and # of flows (length of seq)
            fastq_fname_open=open(fastq_fname)
            first_seq_fastq=get_top_fastq_two_lines(fastq_fname_open)
            header_length=len(first_seq_fastq[1])
            num_flows=len(first_seq_fastq[1])
            
            # insert fastq info
            valid=data_access.addSFFFileInfo(True,input_fname,
                                          seq_counts,
                                          header_length,
                                          None,
                                          num_flows,
                                          None,
                                          None,
                                          None,
                                          fastq_md5,seq_run_id)
        else:
            seq_run_id=data_access.getSeqRunIDUsingMD5(fastq_md5)
    
    print 'sequence_run_id is: %s' % str(seq_run_id)
    
    # get md5 for split-library input
    split_lib_input_md5sum=safe_md5(MD5Wrap(fastq_filenames)).hexdigest()
    print split_lib_input_md5sum
    print 'Finished loading the processed ILLUMINA data!'
    print 'Run ID: %s' % seq_run_id
    print 'Analysis ID: %s' % analysis_id
    
    # update analysis table with seq_run_id
    valid=data_access.updateAnalysisWithSeqRunID(True,analysis_id,seq_run_id)
    if not valid:
        raise ValueError, 'Error: Unable to append SEQ_RUN_ID into ANALYSIS table!'

    return analysis_id,input_dir,seq_run_id,split_lib_input_md5sum
Example #42
0
try:
    import h5py
    h5py_lib_version = (h5py.__version__ +
                        ' (HDF5 version: %s)' % h5py.version.hdf5_version)
except ImportError:
    h5py_lib_version = "Not installed."

pynast_lib_version = get_pynast_version()
if pynast_lib_version is None:
    pynast_lib_version = "Not installed."

if which('sortmerna') is None:
    sortmerna_lib_version = "Not installed."
else:
    _, serr, _ = qiime_system_call("sortmerna --version")
    sortmerna_lib_version = serr.strip()

if which('sumaclust') is None:
    sumaclust_lib_version = "Not installed."
else:
    sout, _, _ = qiime_system_call("sumaclust --help")
    sout_lines = sout.split('\n')
    sumaclust_lib_version = "Installed, but can't identify version."
    for e in sout_lines:
        e = e.strip()
        if e.startswith('SUMACLUST Version'):
            sumaclust_lib_version = e
            break

script_info = {}
Example #43
0
def make_line_plot(dir_path,
                   data_file_link,
                   background_color,
                   label_color,
                   xy_coords,
                   props,
                   x_len=8,
                   y_len=4,
                   draw_axes=False,
                   generate_eps=True):
    """ Write a line plot

    xy_coords: a dict of form {series_label:([x data], [y data], point_marker, color)}

    (code adapted from Micah Hamady's code)
    """
    rc('font', size='8')
    rc('axes', linewidth=.5, edgecolor=label_color)
    rc('axes', labelsize=8)
    rc('xtick', labelsize=8)
    rc('ytick', labelsize=8)
    fig = figure(figsize=(x_len, y_len))
    mtitle = props.get("title", "Groups")
    x_label = props.get("xlabel", "X")
    y_label = props.get("ylabel", "Y")

    title('%s' % mtitle, fontsize='10', color=label_color)
    xlabel(x_label, fontsize='8', color=label_color)
    ylabel(y_label, fontsize='8', color=label_color)

    sorted_keys = sorted(xy_coords.keys())
    labels = []
    for s_label in sorted_keys:
        s_data = xy_coords[s_label]
        c = s_data[3]
        m = s_data[2]
        plot(s_data[0],
             s_data[1],
             c=c,
             marker=m,
             label=s_label,
             linewidth=.1,
             ms=5,
             alpha=1.0)

    fp = FontProperties()
    fp.set_size('8')
    legend(prop=fp, loc=0)

    show()

    img_name = 'scree_plot.png'
    savefig(os.path.join(dir_path, img_name),
            dpi=80,
            facecolor=background_color)

    # Create zipped eps files
    eps_link = ""
    if generate_eps:
        eps_img_name = str('scree_plot.eps')
        savefig(os.path.join(dir_path, eps_img_name), format='eps')
        out, err, retcode = qiime_system_call(
            "gzip -f " + os.path.join(dir_path, eps_img_name))
        eps_link = DOWNLOAD_LINK % (
            (os.path.join(data_file_link, eps_img_name) + ".gz"),
            "Download Figure")

    return os.path.join(data_file_link, img_name), eps_link
Example #44
0
def create_personal_results(mapping_fp, 
                            distance_matrix_fp, 
                            collated_dir_fp, 
                            output_fp, prefs_fp, 
                            personal_id_field,
                            otu_table,
                            parameter_fp, 
                            personal_ids=None, 
                            column_title='Self', 
                            individual_titles=None,
                            category_to_split='BodySite',
                            time_series_category='WeeksSinceStart',
                            suppress_alpha_rarefaction=False,
                            verbose=False):
    map_as_list, header, comments = parse_mapping_file(open(mapping_fp, 'U'))
    try:
        personal_id_index = header.index(personal_id_field)
    except ValueError:
        raise ValueError("personal id field (%s) is not a mapping file column header" %
                         personal_id_field)
    header.append(column_title)
    
    if personal_ids == None: 
        personal_ids  = get_personal_ids(map_as_list, personal_id_index)
    else:
        for id in personal_ids.split(','):
            if id not in get_personal_ids(map_as_list, personal_id_index):
                raise ValueError('%s is not an id in the mapping file.' %id)
        personal_ids = personal_ids.split(',')
        
    output_directories = []
    makedirs(output_fp)
    otu_table_title = otu_table.split('/')[-1].split('.')
    for person_of_interest in personal_ids:
        makedirs(join(output_fp, person_of_interest))
        pcoa_dir = join(output_fp, person_of_interest, "beta_diversity")
        rarefaction_dir = join(output_fp, person_of_interest, "alpha_rarefaction")
        area_plots_dir = join(output_fp, person_of_interest, "time_series")
        output_directories.append(pcoa_dir)
        output_directories.append(rarefaction_dir)
        output_directories.append(area_plots_dir)
        personal_mapping_file_fp = join(output_fp, person_of_interest, "mapping_file.txt")
        html_fp = join(output_fp, person_of_interest, "index.html")
        personal_map = create_personal_mapping_file(map_as_list,
                                     header,
                                     comments,
                                     person_of_interest,
                                     personal_mapping_file_fp, 
                                     personal_id_index, 
                                     individual_titles)
        create_index_html(person_of_interest, html_fp)
        column_title_index = header.index(column_title)
        column_title_values = set([e[column_title_index] for e in personal_map])
        cat_index = header.index(category_to_split)
        cat_values = set([e[cat_index] for e in personal_map])
        
        ## Alpha rarefaction steps
        if not suppress_alpha_rarefaction:
            cmd = "make_rarefaction_plots.py -i %s -m %s -p %s -o %s" % (collated_dir_fp, 
                                                                         personal_mapping_file_fp,
                                                                         prefs_fp, 
                                                                         rarefaction_dir)
            if verbose:
                print cmd          
            stdout, stderr, return_code = qiime_system_call(cmd)
            if return_code != 0:
                raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
                (cmd, stdout, stderr))
        
        ## Beta diversity steps
        cmd = "make_3d_plots.py -m %s -p %s -i %s -o %s" % (personal_mapping_file_fp, 
                                                            prefs_fp, 
                                                            distance_matrix_fp, 
                                                            pcoa_dir)
        if verbose:
            print cmd
        stdout, stderr, return_code = qiime_system_call(cmd)
        if return_code != 0:
            raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
            (cmd, stdout, stderr))
        
        ## Split OTU table into self/other per-body-site tables
        cmd = "split_otu_table.py -i %s -m %s -f %s -o %s" % (otu_table,
                                                              personal_mapping_file_fp,
                                                              column_title, 
                                                              area_plots_dir)
        if verbose:
            print cmd
        stdout, stderr, return_code = qiime_system_call(cmd)
        if return_code != 0:
            raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
            (cmd, stdout, stderr))
            
        
        for column_title_value in column_title_values:
            print column_title_value
            biom_fp = join(area_plots_dir, '%s_%s.%s' % (otu_table_title[0], column_title_value, otu_table_title[1]))
            body_site_dir = join(area_plots_dir, column_title_value)
            cmd = "split_otu_table.py -i %s -m %s -f %s -o %s" % (biom_fp,
                                                                  personal_mapping_file_fp,
                                                                  category_to_split, 
                                                                  body_site_dir)
            if verbose:
                print cmd
            stdout, stderr, return_code = qiime_system_call(cmd)
            if return_code != 0:
                raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
                (cmd, stdout, stderr))
                
            for cat_value in cat_values:
                otu_table_fp = join(body_site_dir, "otu_table_%s_%s.biom" % (column_title_value, cat_value))
                print otu_table_fp
                if exists(otu_table_fp):
                    # Not supporting parameter files yet
                    #if parameter_fp == None:
                    #    parameter_fp = ''
                    #else:
                    #    parameter_fp = '-p %s' %parameter_fp
                    plots = join(area_plots_dir, "taxa_plots_%s_%s" % (column_title_value, cat_value))
                    cmd = "summarize_taxa_through_plots.py -i %s -o %s -c %s -m %s -s" % (otu_table_fp,
                                                                                                plots,
                                                                                                time_series_category, 
                                                                                                personal_mapping_file_fp)
                                                                                                #parameter_fp)
                    if verbose:
                        print cmd
                    stdout, stderr, return_code = qiime_system_call(cmd)
                    if return_code != 0:
                        raise ValueError("Command failed!\nCommand: %s\n Stdout: %s\n Stderr: %s\n" %\
                        (cmd, stdout, stderr))
    return output_directories
Example #45
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if (opts.suppress_unit_tests and \
       opts.suppress_script_tests and \
       opts.suppress_script_usage_tests):
        option_parser.error(
            "You're suppressing all three test types. Nothing to run.")

    test_dir = abspath(dirname(__file__))

    unittest_good_pattern = re.compile('OK\s*$')
    application_not_found_pattern = re.compile('ApplicationNotFoundError')
    python_name = 'python'
    bad_tests = []
    missing_application_tests = []

    # Run through all of QIIME's unit tests, and keep track of any files which
    # fail unit tests.
    if not opts.suppress_unit_tests:
        unittest_names = []
        if not opts.unittest_glob:
            for root, dirs, files in walk(test_dir):
                for name in files:
                    if name.startswith('test_') and name.endswith('.py'):
                        unittest_names.append(join(root, name))
        else:
            for fp in glob(opts.unittest_glob):
                fn = split(fp)[1]
                if fn.startswith('test_') and fn.endswith('.py'):
                    unittest_names.append(abspath(fp))

        unittest_names.sort()

        for unittest_name in unittest_names:
            print "Testing %s:\n" % unittest_name
            command = '%s %s -v' % (python_name, unittest_name)
            stdout, stderr, return_value = qiime_system_call(command)
            print stderr
            if not unittest_good_pattern.search(stderr):
                if application_not_found_pattern.search(stderr):
                    missing_application_tests.append(unittest_name)
                else:
                    bad_tests.append(unittest_name)

    bad_scripts = []
    if not opts.suppress_script_tests:
        # Run through all of QIIME's scripts, and pass -h to each one. If the
        # resulting stdout does not being with the Usage text, that is an
        # indicator of something being wrong with the script. Issues that would
        # cause that are bad import statements in the script, SyntaxErrors, or
        # other failures prior to running qiime.util.parse_command_line_parameters.

        try:
            scripts_dir = get_qiime_scripts_dir()
            script_directory_found = True
        except AssertionError:
            script_directory_found = False

        if script_directory_found:
            script_names = []
            script_names = glob('%s/*py' % scripts_dir)
            script_names.sort()

            for script_name in script_names:
                script_good_pattern = re.compile('^Usage: %s' %
                                                 split(script_name)[1])
                print "Testing %s." % script_name
                command = '%s %s -h' % (python_name, script_name)
                stdout, stderr, return_value = qiime_system_call(command)
                if not script_good_pattern.search(stdout):
                    bad_scripts.append(script_name)

    num_script_usage_example_failures = 0
    qiime_test_data_dir = qiime_config['qiime_test_data_dir']
    if not opts.suppress_script_usage_tests and qiime_test_data_dir != None:
        # Run the script usage testing functionality
        script_usage_result_summary, num_script_usage_example_failures = \
         run_script_usage_tests(
               qiime_test_data_dir=qiime_test_data_dir,
               qiime_scripts_dir=qiime_config['qiime_scripts_dir'],
               working_dir=qiime_config['temp_dir'],
               verbose=True,
               tests=None, # runs all
               failure_log_fp=None,
               force_overwrite=True)

    print "==============\nResult summary\n=============="

    if not opts.suppress_unit_tests:
        print "\nUnit test result summary\n------------------------\n"
        if bad_tests:
            print "\nFailed the following unit tests.\n%s" % '\n'.join(
                bad_tests)

        if missing_application_tests:
            print "\nFailed the following unit tests, in part or whole due "+\
            "to missing external applications.\nDepending on the QIIME features "+\
            "you plan to use, this may not be critical.\n%s"\
             % '\n'.join(missing_application_tests)

        if not (missing_application_tests or bad_tests):
            print "\nAll unit tests passed.\n\n"

    if not opts.suppress_script_tests:
        print "\nBasic script test result summary\n--------------------------------\n"
        if not script_directory_found:
            print "Critical error: Failed to test scripts because the script directory could not be found.\n The most likely explanation for this failure is that you've installed QIIME using setup.py, and forgot to specify the qiime_scripts_dir in your qiime_config file. This value shoud be set either to the directory you provided for --install-scripts, or /usr/local/bin if no value was provided to --install-scripts."
        else:
            if bad_scripts:
                print "Failed the following basic script tests.\n%s" % '\n'.join(
                    bad_scripts)
            else:
                print "All basic script tests passed successfully.\n"

    qiime_test_data_dir_exists = True
    if not opts.suppress_script_usage_tests:
        if qiime_test_data_dir:
            print "\nScript usage test result summary\n------------------------------------\n"
            print script_usage_result_summary
        else:
            print "\nCould not run script usage tests because qiime_test_data_dir is not defined in your qiime_config."
            qiime_test_data_dir_exists = False
        print ""

    # If any of the unit tests, script tests, or script usage tests fail, or if
    # we have any missing application errors or a missing QIIME test data dir
    # if script usage tests weren't suppressed, use return code 1 (as python's
    # unittest module does to indicate one or more failures).
    return_code = 1
    if (len(bad_tests) == 0 and len(missing_application_tests) == 0
            and len(bad_scripts) == 0
            and num_script_usage_example_failures == 0
            and qiime_test_data_dir_exists):
        return_code = 0
    return return_code