Ejemplo n.º 1
0
def _read_inputs(biom_table: biom.Table, phylogeny_fp: NewickFormat,
                 meta_data: NumericMetadataColumn = None):
    if meta_data:
        generate_strategy = "balancing"
        meta, biom_table = _sort_metada(meta_data, biom_table)
        y = meta.iloc[:, 0]
        samples = meta.index
    else:
        generate_strategy = "augmentation"
        y = pd.Series(data=np.ones((len(biom_table.ids('sample')),)),
                      index=biom_table.ids('sample'))
        samples = biom_table.ids('sample')

    _table_tmp = biom_table.sort_order(axis='sample', order=samples)
    _table = _map_observations(_table_tmp)
    pruned_phylogeny_fp = _prune_features_from_phylogeny(_table, phylogeny_fp)
    _tree = dendropy.Tree.get(path=str(pruned_phylogeny_fp),
                              preserve_underscores=False,
                              schema="newick", rooting='default-rooted')

    if sum(samples != _table.ids('sample')) > 0:
        raise ValueError("The samples IDs in meta data and biom table are "
                         "not the same! The difference is:",
                         set(samples) - set(_table.ids('sample')),
                         "Please double check.")

    return _table, y, _tree, generate_strategy, pruned_phylogeny_fp
Ejemplo n.º 2
0
def plot(output_dir,
         table: biom.Table,
         metadata: q2.Metadata,
         case_where: str,
         control_where: str,
         feature_tree: skbio.TreeNode = None):

    with open('/tmp/tree.nwk', 'w') as fh:
        feature_tree.write(fh)

    copy_tree(os.path.join(PLOT, 'assets', 'dist'), output_dir)
    data_dir = os.path.join(output_dir, 'data')
    os.mkdir(data_dir)

    metadata = metadata.filter_ids(table.ids(axis='sample'))
    case_samples = sorted(list(metadata.get_ids(case_where)))
    control_samples = sorted(list(metadata.get_ids(control_where)))

    table.filter(case_samples + control_samples)
    table.remove_empty('observation')
    features = list(table.ids(axis='observation'))

    if feature_tree is not None:
        feature_tree = shear_no_prune(feature_tree, features)
    else:
        feature_tree = TreeNode()

    tree_data = tree_to_array(feature_tree)
    idx, = np.where(np.asarray(tree_data['children']) == 0)
    tree_data['lookup'] = dict(zip(map(str, idx), range(len(idx))))

    tip_order = np.asarray(tree_data['names'])[idx]
    table = table.sort_order(tip_order, axis='observation')
    table = table.sort_order(case_samples + control_samples, axis='sample')

    with open(os.path.join(data_dir, 'packed_table.jsonp'), 'w') as fh:
        fh.write('LOAD_PACKED_TABLE(')
        fh.write(json.dumps(table_to_b64pa(table)))
        fh.write(');')

    with open(os.path.join(data_dir, 'tree.jsonp'), 'w') as fh:
        fh.write('LOAD_TREE(')
        fh.write(json.dumps(tree_data))
        fh.write(');')
Ejemplo n.º 3
0
def _read_inputs(biom_table: biom.Table,
                 meta_data: NumericMetadataColumn = None):
    if meta_data:
        meta, biom_table = _sort_metada(meta_data, biom_table)
        y = meta.iloc[:, 0]
        samples = meta.index
    else:
        samples = biom_table.ids('sample')
        y = pd.DataFrame(data=np.asarray(np.ones((len(samples), 1))).ravel(),
                         index=samples)

    _table = biom_table.sort_order(axis='sample', order=samples)

    if np.sum(samples != _table.ids('sample')) > 0:
        raise ValueError(
            "The samples IDs in meta data and biom table are not the same! The difference is:",
            set(samples) - set(_table.ids('sample')), "Please double check.")

    return _table, y