def main(environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1): samples_dict = utils.get_samples_dict_from_environment_file( environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents( oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf' utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main( environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1 ): samples_dict = utils.get_samples_dict_from_environment_file(environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf" utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None, )
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.cosine_similarity import get_oligotype_sets from Oligotyping.utils.cosine_similarity import get_oligotype_sets_greedy from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar from Oligotyping.utils.utils import generate_ENVIRONMENT_file input_file_path = sys.argv[1] cosine_similarity_value = float(sys.argv[2]) sets_output_file_name = input_file_path + '-cos-%s-SETS' % cosine_similarity_value environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_similarity_value samples_dict = get_samples_dict_from_environment_file(input_file_path) oligos = get_oligos_sorted_by_abundance(samples_dict) unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = samples_dict.keys() across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts( oligos, samples_dict.keys(), unit_percents) oligotype_sets = get_oligotype_sets_greedy(oligos, across_samples_sum_normalized, cosine_similarity_value, sets_output_file_name) print '%d sets from %d units' % (len(oligotype_sets), len(oligos)) samples_dict_with_agglomerated_oligos = {} for sample in samples: samples_dict_with_agglomerated_oligos[sample] = {}
# -*- coding: utf-8 -*- # takes an environment file and a generates matching percent and count matrices. import sys from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.utils import get_oligos_sorted_by_abundance from Oligotyping.utils.utils import get_units_across_samples_dicts from Oligotyping.utils.utils import get_unit_counts_and_percents from Oligotyping.utils.utils import generate_MATRIX_files samples_dict = get_samples_dict_from_environment_file(sys.argv[1]) oligos = get_oligos_sorted_by_abundance(samples_dict) oligos.reverse() unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) samples = sorted(samples_dict.keys()) generate_MATRIX_files(oligos, samples, unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT', sys.argv[1] + '-MATRIX-PERCENT')