def __init__(self, config_file, mutation_ids, attribute_map, precision=False): self.config_file = config_file self.sample_ids = paths.get_sample_ids(config_file) self.mutation_ids = mutation_ids self.attribute_map = attribute_map self.update_precision = precision
def _load_variant_allele_frequencies(config_file): data = [] for sample_id, file_name in paths.get_mutations_files(config_file).items(): sample_data = _load_sample_variant_allele_frequencies(file_name) sample_data['sample_id'] = sample_id data.append(sample_data) data = pd.concat(data, axis=0) num_samples = len(paths.get_sample_ids(config_file)) # Filter for mutations in all samples data = data.groupby('mutation_id').filter(lambda x: len(x) == num_samples) return data
def _load_cellular_prevalences(config_file, burnin, thin): data = [] for sample_id, file_name in paths.get_cellular_prevalence_trace_files( config_file).items(): sample_data = _load_sample_cellular_prevalences( file_name, burnin, thin) sample_data['sample_id'] = sample_id data.append(sample_data) data = pd.concat(data, axis=0) num_samples = len(paths.get_sample_ids(config_file)) # Filter for mutations in all samples data = data.groupby('mutation_id').filter(lambda x: len(x) == num_samples) return data