def main(environment_file,
         sample_mapping_file=None,
         unit_mapping_file=None,
         min_abundance=0,
         min_sum_normalized_percent=1):
    samples_dict = utils.get_samples_dict_from_environment_file(
        environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict,
                                                  min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(
        oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf'
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main(
    environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1
):
    samples_dict = utils.get_samples_dict_from_environment_file(environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf"
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None,
    )
Exemple #3
0
def generate_exclusive_figures(_object):
    import Oligotyping
    scripts_dir_path = os.path.dirname(Oligotyping.__file__)
    exclusive_figures_dict = {}

    sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping)

    for category in sample_mapping_dict:
        exclusive_figures_dict[category] = {}
        samples = sample_mapping_dict[category].keys()

        # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the
        # refinement, second makes sure that sample was actually mapped to something in the sample mapping file.
        samples = filter(lambda s: sample_mapping_dict[category][s],
                         filter(lambda s: s in _object.samples, samples))
        samples.sort()

        mapping_file_path = get_temporary_file_name('%s-' % category,
                                                    '-mapping.txt',
                                                    _object.tmp_directory)
        mapping_file = open(mapping_file_path, 'w')
        mapping_file.write('samples\t%s\n' % (category))

        for sample in samples:
            mapping_file.write('%s\t%s\n' %
                               (sample, sample_mapping_dict[category][sample]))
        mapping_file.close()

        if samples == _object.samples:
            matrix_percent_path = _object.matrix_percent_file_path
            matrix_count_path = _object.matrix_count_file_path
        else:
            matrix_percent_path = get_temporary_file_name(
                '%s-' % category, '-matrix-percent.txt', _object.tmp_directory)
            matrix_count_path = get_temporary_file_name(
                '%s-' % category, '-matrix-count.txt', _object.tmp_directory)

            if store_filtered_matrix(_object.matrix_percent_file_path,
                                     matrix_percent_path, samples) < 3:
                _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MP"\
                                         % (category))
                continue
            if store_filtered_matrix(_object.matrix_count_file_path,
                                     matrix_count_path, samples) < 3:
                _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MC"\
                                         % (category))
                continue

        # ready to roll.
        _object.logger.info("exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'"\
                             % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path))

        for (analysis, script, output_dir) in [
            ('NMDS Analysis', '../Scripts/R/metaMDS-analysis-with-metadata.R',
             'nmds_analysis')
        ]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                '%s/%s/%s' % (os.path.basename(
                    _object.figures_directory), category, output_dir),
                directory=True)

            for (distance_metric,
                 matrix_file) in [("canberra", matrix_percent_path),
                                  ("kulczynski", matrix_percent_path),
                                  ("jaccard", matrix_percent_path),
                                  ("horn", matrix_percent_path),
                                  ("bray", matrix_percent_path)]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = (
                    '%s "%s" "%s" %s "%s" "%s" "%s" >> "%s" 2>&1' %
                    (os.path.join(scripts_dir_path, script), matrix_file,
                     mapping_file_path, distance_metric, category,
                     _object.project, output_prefix, _object.log_file_path))
                _object.progress.update('%s "%s" for "%s" ...' %
                                        (analysis, distance_metric, category))
                _object.logger.info('exclusive figure: %s' % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][
                    distance_metric] = output_prefix

        # heatmap
        for (analysis, script, output_dir) in [
            ('Heatmap Analysis', '../Scripts/R/heatmap.R', 'heatmap_analysis')
        ]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                '%s/%s/%s' % (os.path.basename(
                    _object.figures_directory), category, output_dir),
                directory=True)

            for (distance_metric,
                 matrix_file) in [("canberra", matrix_percent_path),
                                  ("kulczynski", matrix_percent_path),
                                  ("jaccard", matrix_percent_path),
                                  ("horn", matrix_percent_path),
                                  ("bray", matrix_percent_path)]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = (
                    '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' %
                    (os.path.join(scripts_dir_path, script), matrix_file,
                     mapping_file_path, distance_metric, _object.project,
                     output_prefix, _object.log_file_path))
                _object.progress.update('%s "%s" for "%s" ...' %
                                        (analysis, distance_metric, category))
                _object.logger.info('exclusive figure: %s' % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][
                    distance_metric] = output_prefix

    return exclusive_figures_dict
Exemple #4
0
def generate_exclusive_figures(_object):
    import Oligotyping

    scripts_dir_path = os.path.dirname(Oligotyping.__file__)
    exclusive_figures_dict = {}

    sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping)

    for category in sample_mapping_dict:
        exclusive_figures_dict[category] = {}
        samples = sample_mapping_dict[category].keys()

        # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the
        #  refinement, second makes sure that sample was actually mapped to something in the sample mapping file.
        samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples))
        samples.sort()

        mapping_file_path = get_temporary_file_name("%s-" % category, "-mapping.txt", _object.tmp_directory)
        mapping_file = open(mapping_file_path, "w")
        mapping_file.write("samples\t%s\n" % (category))

        for sample in samples:
            mapping_file.write("%s\t%s\n" % (sample, sample_mapping_dict[category][sample]))
        mapping_file.close()

        if samples == _object.samples:
            matrix_percent_path = _object.matrix_percent_file_path
            matrix_count_path = _object.matrix_count_file_path
        else:
            matrix_percent_path = get_temporary_file_name(
                "%s-" % category, "-matrix-percent.txt", _object.tmp_directory
            )
            matrix_count_path = get_temporary_file_name("%s-" % category, "-matrix-count.txt", _object.tmp_directory)

            if store_filtered_matrix(_object.matrix_percent_file_path, matrix_percent_path, samples) < 3:
                _object.logger.info(
                    "skipping exclusive figs for '%s'; less than 3 samples were left in MP" % (category)
                )
                continue
            if store_filtered_matrix(_object.matrix_count_file_path, matrix_count_path, samples) < 3:
                _object.logger.info(
                    "skipping exclusive figs for '%s'; less than 3 samples were left in MC" % (category)
                )
                continue

        # ready to roll.
        _object.logger.info(
            "exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'"
            % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path)
        )

        for (analysis, script, output_dir) in [
            ("NMDS Analysis", "../Scripts/R/metaMDS-analysis-with-metadata.R", "nmds_analysis")
        ]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True
            )

            for (distance_metric, matrix_file) in [
                ("canberra", matrix_percent_path),
                ("kulczynski", matrix_percent_path),
                ("jaccard", matrix_percent_path),
                ("horn", matrix_percent_path),
                ("bray", matrix_percent_path),
            ]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = '%s -o "%s" -d "%s" -m "%s" --title "%s" "%s" "%s" >> "%s" 2>&1' % (
                    os.path.join(scripts_dir_path, script),
                    output_prefix,
                    distance_metric,
                    category,
                    _object.project,
                    matrix_file,
                    mapping_file_path,
                    _object.log_file_path,
                )
                _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category))
                _object.logger.info("exclusive figure: %s" % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix

        # heatmap
        for (analysis, script, output_dir) in [("Heatmap Analysis", "../Scripts/R/heatmap.R", "heatmap_analysis")]:
            exclusive_figures_dict[category][output_dir] = {}

            target_dir = _object.generate_output_destination(
                "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True
            )

            for (distance_metric, matrix_file) in [
                ("canberra", matrix_percent_path),
                ("kulczynski", matrix_percent_path),
                ("jaccard", matrix_percent_path),
                ("horn", matrix_percent_path),
                ("bray", matrix_percent_path),
            ]:
                output_prefix = os.path.join(target_dir, distance_metric)
                cmd_line = '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' % (
                    os.path.join(scripts_dir_path, script),
                    matrix_file,
                    mapping_file_path,
                    distance_metric,
                    _object.project,
                    output_prefix,
                    _object.log_file_path,
                )
                _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category))
                _object.logger.info("exclusive figure: %s" % (cmd_line))
                run_command(cmd_line)
                exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix

    return exclusive_figures_dict