Beispiel #1
0
def parse_counts(biom_file, tree_file=None):
    counts = []
    table = biom.load_table(biom_file)
    if tree_file:
        tree = ParsedTree(
            tree_file,
            'newick',
            taxon_name_re,
            lineage_prefixes=['k__', 'p__', 'c__', 'o__', 'f__', 'g__', 's__'])
        tree.set_index_clades(contains=['A', 'T', 'G', 'C'])
    for obs_id, samp_id in table.nonzero():
        lineage = parse_lineage(table, obs_id)
        if not lineage and tree:
            lineage = parse_lineage(table,
                                    obs_id,
                                    parsed_tree=tree,
                                    index_only=True)
        seq_var = parse_sequencing_variant(obs_id)
        count = table.get_value_by_ids(obs_id, samp_id)
        counts.append(
            Count(samp_id=samp_id,
                  obs_id=obs_id,
                  count=count,
                  seq_var=seq_var,
                  lineage=lineage))
    return counts
Beispiel #2
0
    with session_scope() as session_2:
        for exp_id, experiment in experiments.items():
            for subject in experiment.subjects:
                for sample in subject.samples:
                    for prep in sample.preparations:
                        for workflow in prep.workflows:
                            try:
                                workflow.count_dict
                            except AttributeError:
                                continue
                            for count in workflow.count_dict[sample.orig_sample_id]:
                                fact = Count(experiment=experiment,
                                             subject=subject,
                                             sample=sample,
                                             sample_site=sample.sampling_site,
                                             sample_time=sample.sampling_time,
                                             preparation=prep,
                                             workflow=workflow,
                                             lineage=count.lineage,
                                             seq_variant=count.seq_var,
                                             count=count.count)
                                session_2.add(fact)
    end = time.time()
    print("Main loop took: ", end-start)


def parser(session):
	"""Parse individual prep and BIOM files when parsing a study.
	
	Note: This method will produce duplicates of sample, subject, and
	processing data each time a BIOM file is inserted. It also unfortunately
	duplicates count data (for each processing in the prep data file). This