def parse_counts(biom_file, tree_file=None): counts = [] table = biom.load_table(biom_file) if tree_file: tree = ParsedTree( tree_file, 'newick', taxon_name_re, lineage_prefixes=['k__', 'p__', 'c__', 'o__', 'f__', 'g__', 's__']) tree.set_index_clades(contains=['A', 'T', 'G', 'C']) for obs_id, samp_id in table.nonzero(): lineage = parse_lineage(table, obs_id) if not lineage and tree: lineage = parse_lineage(table, obs_id, parsed_tree=tree, index_only=True) seq_var = parse_sequencing_variant(obs_id) count = table.get_value_by_ids(obs_id, samp_id) counts.append( Count(samp_id=samp_id, obs_id=obs_id, count=count, seq_var=seq_var, lineage=lineage)) return counts
with session_scope() as session_2: for exp_id, experiment in experiments.items(): for subject in experiment.subjects: for sample in subject.samples: for prep in sample.preparations: for workflow in prep.workflows: try: workflow.count_dict except AttributeError: continue for count in workflow.count_dict[sample.orig_sample_id]: fact = Count(experiment=experiment, subject=subject, sample=sample, sample_site=sample.sampling_site, sample_time=sample.sampling_time, preparation=prep, workflow=workflow, lineage=count.lineage, seq_variant=count.seq_var, count=count.count) session_2.add(fact) end = time.time() print("Main loop took: ", end-start) def parser(session): """Parse individual prep and BIOM files when parsing a study. Note: This method will produce duplicates of sample, subject, and processing data each time a BIOM file is inserted. It also unfortunately duplicates count data (for each processing in the prep data file). This