def main(environment_file,
         sample_mapping_file=None,
         unit_mapping_file=None,
         min_abundance=0,
         min_sum_normalized_percent=1):
    samples_dict = utils.get_samples_dict_from_environment_file(
        environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict,
                                                  min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(
        oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf'
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main(
    environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1
):
    samples_dict = utils.get_samples_dict_from_environment_file(environment_file)
    oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance)
    unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict)

    if sample_mapping_file:
        sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file)

    if unit_mapping_file:
        unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file)

    output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf"
    utils.generate_gexf_network_file(
        oligos,
        samples_dict,
        unit_percents,
        output_file,
        sample_mapping_dict=sample_mapping if sample_mapping_file else None,
        unit_mapping_dict=unit_mapping if unit_mapping_file else None,
    )
Example #3
0
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.cosine_similarity import get_oligotype_sets
from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy
from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar
from Oligotyping.utils.utils import generate_ENVIRONMENT_file

input_file_path = sys.argv[1]
cosine_similarity_value = float(sys.argv[2])
sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value
environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value

samples_dict = get_samples_dict_from_environment_file(input_file_path)
oligos = get_oligos_sorted_by_abundance(samples_dict)
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = samples_dict.keys()

across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(
    oligos, samples_dict.keys(), unit_percents)
oligotype_sets = get_oligotype_sets_greedy(oligos,
                                           across_samples_sum_normalized,
                                           cosine_similarity_value,
                                           sets_output_file_name)

print '%d sets from %d units' % (len(oligotype_sets), len(oligos))

samples_dict_with_agglomerated_oligos = {}

for sample in samples:
    samples_dict_with_agglomerated_oligos[sample] = {}
# -*- coding: utf-8 -*-
# takes an environment file and a generates matching percent and count matrices.

import sys
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_units_across_samples_dicts
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.utils import generate_MATRIX_files

samples_dict = get_samples_dict_from_environment_file(sys.argv[1])
oligos = get_oligos_sorted_by_abundance(samples_dict)
oligos.reverse()
unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict)
samples = sorted(samples_dict.keys())

generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT',  sys.argv[1] + '-MATRIX-PERCENT')