Exemple #1
0
    def __init__(self,
                 input_fasta,
                 target,
                 output=None,
                 binary="blastn",
                 makeblastdb="makeblastdb",
                 log="/dev/null"):
        self.binary = binary
        self.params = ''
        self.input = input_fasta
        self.target = target
        self.output = output
        self.makeblastdb = makeblastdb
        self.log = log
        self.outfmt = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen'"

        self.binary_check()
        self.version_check()

        if not output:
            self.output = get_temporary_file_name()
        else:
            self.output = output

        self.search_cmd_tmpl = "%(binary)s -query %(input)s -db %(target)s -out %(output)s -outfmt %(outfmt)s %(params)s >> %(log)s 2>&1"
        self.makeblastdb_cmd_tmpl = "%(makeblastdb)s -in %(target)s -dbtype nucl >> %(log)s 2>&1"

        self.results_dict = {}
Exemple #2
0
    def __init__(self, input_fasta, target, output = None, binary = "blastn", makeblastdb = "makeblastdb", log = "/dev/null"):
        self.binary = binary
        self.params = ''
        self.input = input_fasta
        self.target = target
        self.output = output
        self.makeblastdb = makeblastdb
        self.log = log
        self.outfmt = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen'"

        self.binary_check()        
        self.version_check()

        if not output:
            self.output = get_temporary_file_name()
        else:
            self.output = output
        
        self.search_cmd_tmpl = "%(binary)s -query %(input)s -db %(target)s -out %(output)s -outfmt %(outfmt)s %(params)s >> %(log)s 2>&1"
        self.makeblastdb_cmd_tmpl = "%(makeblastdb)s -in %(target)s -dbtype nucl >> %(log)s 2>&1"
        
        self.results_dict = {}
Exemple #3
0
def generate_exclusive_figures(_object):
    import Oligotyping
    scripts_dir_path = os.path.dirname(Oligotyping.__file__)
    exclusive_figures_dict = {}

    sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping)

    for category in sample_mapping_dict:
        exclusive_figures_dict[category] = {}
        samples = sample_mapping_dict[category].keys()

        # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the
        # refinement, second makes sure that sample was actually mapped to something in the sample mapping file.
        samples = filter(lambda s: sample_mapping_dict[category][s],
                         filter(lambda s: s in _object.samples, samples))
        samples.sort()

        mapping_file_path = get_temporary_file_name('%s-' % category,
                                                    '-mapping.txt',
                                                    _object.tmp_directory)
        mapping_file = open(mapping_file_path, 'w')
        mapping_file.write('samples\t%s\n' % (category))

        for sample in samples:
            mapping_file.write('%s\t%s\n' %
                               (sample, sample_mapping_dict[category][sample]))
        mapping_file.close()

        if samples == _object.samples:
            matrix_percent_path = _object.matrix_percent_file_path
            matrix_count_path = _object.matrix_count_file_path
        else:
            matrix_percent_path = get_temporary_file_name(
                '%s-' % category, '-matrix-percent.txt', _object.tmp_directory)
            matrix_count_path = get_temporary_file_name(
                '%s-' % category, '-matrix-count.txt', _object.tmp_directory)

            if store_filtered_matrix(_object.matrix_percent_file_path,
                                     matrix_percent_path, samples) < 3:
                _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MP"\
                                         % (category))
                continue
            if store_filtered_matrix(_object.matrix_count_file_path,
                                     matrix_count_path, samples) < 3:
                _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MC"\
                                         % (category))
                continue

        # ready to roll.
        _object.logger.info("exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'"\
                             % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path))

        for (analysis, script, output_dir) in [
            ('NMDS Analysis', '../Scripts/R/metaMDS-analysis-with-metadata.R',
             'nmds_analysis')
        ]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                '%s/%s/%s' % (os.path.basename(
                    _object.figures_directory), category, output_dir),
                directory=True)

            for (distance_metric,
                 matrix_file) in [("canberra", matrix_percent_path),
                                  ("kulczynski", matrix_percent_path),
                                  ("jaccard", matrix_percent_path),
                                  ("horn", matrix_percent_path),
                                  ("bray", matrix_percent_path)]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = (
                    '%s "%s" "%s" %s "%s" "%s" "%s" >> "%s" 2>&1' %
                    (os.path.join(scripts_dir_path, script), matrix_file,
                     mapping_file_path, distance_metric, category,
                     _object.project, output_prefix, _object.log_file_path))
                _object.progress.update('%s "%s" for "%s" ...' %
                                        (analysis, distance_metric, category))
                _object.logger.info('exclusive figure: %s' % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][
                    distance_metric] = output_prefix

        # heatmap
        for (analysis, script, output_dir) in [
            ('Heatmap Analysis', '../Scripts/R/heatmap.R', 'heatmap_analysis')
        ]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                '%s/%s/%s' % (os.path.basename(
                    _object.figures_directory), category, output_dir),
                directory=True)

            for (distance_metric,
                 matrix_file) in [("canberra", matrix_percent_path),
                                  ("kulczynski", matrix_percent_path),
                                  ("jaccard", matrix_percent_path),
                                  ("horn", matrix_percent_path),
                                  ("bray", matrix_percent_path)]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = (
                    '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' %
                    (os.path.join(scripts_dir_path, script), matrix_file,
                     mapping_file_path, distance_metric, _object.project,
                     output_prefix, _object.log_file_path))
                _object.progress.update('%s "%s" for "%s" ...' %
                                        (analysis, distance_metric, category))
                _object.logger.info('exclusive figure: %s' % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][
                    distance_metric] = output_prefix

    return exclusive_figures_dict
Exemple #4
0
def generate_exclusive_figures(_object):
    import Oligotyping

    scripts_dir_path = os.path.dirname(Oligotyping.__file__)
    exclusive_figures_dict = {}

    sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping)

    for category in sample_mapping_dict:
        exclusive_figures_dict[category] = {}
        samples = sample_mapping_dict[category].keys()

        # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the
        #  refinement, second makes sure that sample was actually mapped to something in the sample mapping file.
        samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples))
        samples.sort()

        mapping_file_path = get_temporary_file_name("%s-" % category, "-mapping.txt", _object.tmp_directory)
        mapping_file = open(mapping_file_path, "w")
        mapping_file.write("samples\t%s\n" % (category))

        for sample in samples:
            mapping_file.write("%s\t%s\n" % (sample, sample_mapping_dict[category][sample]))
        mapping_file.close()

        if samples == _object.samples:
            matrix_percent_path = _object.matrix_percent_file_path
            matrix_count_path = _object.matrix_count_file_path
        else:
            matrix_percent_path = get_temporary_file_name(
                "%s-" % category, "-matrix-percent.txt", _object.tmp_directory
            )
            matrix_count_path = get_temporary_file_name("%s-" % category, "-matrix-count.txt", _object.tmp_directory)

            if store_filtered_matrix(_object.matrix_percent_file_path, matrix_percent_path, samples) < 3:
                _object.logger.info(
                    "skipping exclusive figs for '%s'; less than 3 samples were left in MP" % (category)
                )
                continue
            if store_filtered_matrix(_object.matrix_count_file_path, matrix_count_path, samples) < 3:
                _object.logger.info(
                    "skipping exclusive figs for '%s'; less than 3 samples were left in MC" % (category)
                )
                continue

        # ready to roll.
        _object.logger.info(
            "exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'"
            % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path)
        )

        for (analysis, script, output_dir) in [
            ("NMDS Analysis", "../Scripts/R/metaMDS-analysis-with-metadata.R", "nmds_analysis")
        ]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True
            )

            for (distance_metric, matrix_file) in [
                ("canberra", matrix_percent_path),
                ("kulczynski", matrix_percent_path),
                ("jaccard", matrix_percent_path),
                ("horn", matrix_percent_path),
                ("bray", matrix_percent_path),
            ]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = '%s -o "%s" -d "%s" -m "%s" --title "%s" "%s" "%s" >> "%s" 2>&1' % (
                    os.path.join(scripts_dir_path, script),
                    output_prefix,
                    distance_metric,
                    category,
                    _object.project,
                    matrix_file,
                    mapping_file_path,
                    _object.log_file_path,
                )
                _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category))
                _object.logger.info("exclusive figure: %s" % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix

        # heatmap
        for (analysis, script, output_dir) in [("Heatmap Analysis", "../Scripts/R/heatmap.R", "heatmap_analysis")]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True
            )

            for (distance_metric, matrix_file) in [
                ("canberra", matrix_percent_path),
                ("kulczynski", matrix_percent_path),
                ("jaccard", matrix_percent_path),
                ("horn", matrix_percent_path),
                ("bray", matrix_percent_path),
            ]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' % (
                    os.path.join(scripts_dir_path, script),
                    matrix_file,
                    mapping_file_path,
                    distance_metric,
                    _object.project,
                    output_prefix,
                    _object.log_file_path,
                )
                _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category))
                _object.logger.info("exclusive figure: %s" % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix

    return exclusive_figures_dict