Esempio n. 1
0
def get_annotations(variants):
    interface.status('Fetching DNA annotations')
    renamed_variants = np.array(
        [var.replace(':', '-').replace('/', '-') for var in variants],
        dtype='str')

    url = 'https://api.missionbio.io/annotations/v1/variants?ids=' + ','.join(
        renamed_variants)
    r = requests.get(url=url)

    data = r.json()
    data = [d['annotations'] for d in data]

    function = [', '.join(d['function']['value']) for d in data]
    gene = [d['gene']['value'] for d in data]
    protein = [d['protein']['value'] for d in data]
    coding_impact = [d['protein_coding_impact']['value'] for d in data]
    clinvar = [', '.join(d['clinvar']['value']) for d in data]
    dann = np.array([d['impact']['value'] for d in data])
    dann[dann == ''] = 0
    dann = np.round(dann.astype(float), 2)

    annot_types = [
        'Gene', 'Function', 'Protein', 'Coding Impact', 'ClinVar', 'DANN'
    ]
    df = pd.DataFrame([gene, function, protein, coding_impact, clinvar, dann],
                      index=annot_types).T
    df['Variant'] = variants

    df = df[['Variant'] + annot_types]

    return df
Esempio n. 2
0
def run(sample, name, should_save):
    for assay, og_assay in zip(
        [sample.dna, sample.protein],
        [sample._original_dna, sample._original_protein]):
        if assay is not None:
            for key in assay.metadata:
                og_assay.add_metadata(key, assay.metadata[key])

            for key in assay.row_attrs:
                og_assay.add_row_attr(key, assay.row_attrs[key])

    if should_save:
        interface.status('Saving h5 file.')
        if name == '':
            interface.error('Please provide a name to save by.')
        elif name[-3:] == '.h5':
            name = name[:-3]

        try:
            os.remove(DFT.ROOT / f'h5/analyzed/{name}.h5')
        except FileNotFoundError:
            pass

        samp = sample[:]
        set_defaults(samp)
        mio.save(samp, DFT.ROOT / f'h5/analyzed/{name}.h5')

        interface.status('Saved.')
        interface.rerun()
Esempio n. 3
0
def cluster(assay, method_func, description, **kwargs):
    similarity = None
    if 'similarity' in kwargs:
        similarity = kwargs['similarity']
        del kwargs['similarity']

    if DFT.CLUSTER_DESCRIPTION not in assay.metadata or assay.metadata[
            DFT.CLUSTER_DESCRIPTION] != description or not assay.metadata[
                DFT.CLUSTERED]:
        interface.status(f'Clustering {assay.name.replace("_", " ")}')
        method_func(**kwargs)
        if similarity is not None:
            assay.cluster_cleanup(AF_MISSING, similarity)

        assay.add_metadata(DFT.CLUSTER_DESCRIPTION, description)

    assay.add_metadata(DFT.CLUSTERED, True)
Esempio n. 4
0
def download(link):
    interface.status('Downloading from s3.')

    s3 = boto3.client('s3')
    link = link.replace('s3://', '')
    link = link.split('/')
    bucket, file = link[0], '/'.join(link[1:])
    filename = file.split('/')[-1]
    filename = DFT.ROOT / f'h5/downloads/{filename}'
    filename = str(filename)
    try:
        s3.download_file(bucket, file, filename)
    except Exception as e:
        interface.status('Done.')
        interface.error(f'Could not find the given h5 file. {e}')

    return filename
Esempio n. 5
0
def run(sample, name):
    interface.status('Saving h5 file.')
    if name == '':
        interface.error('Please provide a name to save by.')
    elif name[-3:] == '.h5':
        name = name[:-3]

    try:
        os.remove(DFT.ROOT / f'h5/analyzed/{name}.h5')
    except FileNotFoundError:
        pass

    samp = sample[:]
    set_defaults(samp)
    mio.save(samp, DFT.ROOT / f'h5/analyzed/{name}.h5')

    interface.status('Saved.')
    interface.rerun()
Esempio n. 6
0
def load(path, load_raw, apply_filter):
    interface.status('Reading h5 file.')

    sample = mio.load(path, apply_filter=apply_filter, raw=load_raw)

    if sample.protein is not None:
        try:
            new_ids = np.array(
                [ab.split(' ')[2] for ab in sample.protein.col_attrs['id']])
        except IndexError:
            new_ids = sample.protein.ids()

        sample.protein.add_col_attr('id', new_ids)
        if sample.protein_raw is not None:
            sample.protein_raw.add_col_attr('id', new_ids)

    init_defaults(sample)

    return sample
Esempio n. 7
0
def preprocess_protein(sample, clicked, drop_abs):
    if sample.protein.metadata[DFT.INITIALIZE] or (
            set(sample.protein.metadata[DFT.DROP_IDS]) != set(drop_abs)
            and clicked):
        interface.status('Processing protein assay.')

        sample.reset('protein')

        sample.protein.add_metadata(DFT.ALL_IDS, sample.protein.ids())
        protein = sample.protein.drop(
            drop_abs) if len(drop_abs) > 0 else sample.protein[:, :]

        for norm in [DFT.CLR, DFT.ASINH, DFT.NSP]:
            protein.normalize_reads(norm)
            protein.add_layer(norm, protein.layers[NORMALIZED_READS])

        sample.protein = protein
        sample.protein.add_metadata(DFT.DROP_IDS, drop_abs)

        if not sample.protein.metadata[DFT.INITIALIZE]:
            sample.protein.add_metadata(DFT.PREPPED, False)
            sample.protein.add_metadata(DFT.CLUSTERED, False)
Esempio n. 8
0
def preprocess_dna(sample, clicked, drop_vars, keep_vars, dp, gq, af, std):
    args_changed = (
        list(sample.dna.metadata[DFT.PREPROCESS_ARGS]) != [dp, gq, af, std]
        or set(sample.dna.metadata[DFT.DROP_IDS]) != set(drop_vars)
        or set(sample.dna.metadata[DFT.KEEP_IDS]) != set(keep_vars))

    if sample.dna.metadata[DFT.INITIALIZE] or (args_changed and clicked):
        interface.status('Processing DNA assay.')

        sample.reset('dna')

        if len(keep_vars) == 0:
            dna_vars = sample.dna.filter_variants(min_dp=dp,
                                                  min_gq=gq,
                                                  min_vaf=af,
                                                  min_std=std)
            sample.dna.add_metadata(DFT.ALL_IDS, sample.dna.ids())
            if len(drop_vars) > 0:
                sample.dna = sample.dna.drop(drop_vars)
        else:
            dna_vars = keep_vars

        if len(dna_vars) == 0:
            interface.status('Done.')
            interface.error(
                'No variants found. Adjust the filters and process again. Make sure "Filter" is deselected in the Files section.'
            )

        sample.dna = sample.dna[:, dna_vars]
        sample.dna.add_metadata(DFT.PREPROCESS_ARGS, [dp, gq, af, std])
        sample.dna.add_metadata(DFT.DROP_IDS, drop_vars)
        sample.dna.add_metadata(DFT.KEEP_IDS, keep_vars)

        if not sample.dna.metadata[DFT.INITIALIZE]:
            sample.dna.add_metadata(DFT.PREPPED, False)
            sample.dna.add_metadata(DFT.CLUSTERED, False)
Esempio n. 9
0
def prepare(assay, scale_attribute, pca_attribute, umap_attribute, pca_comps):
    interface.status(f'Preparing {assay.name.replace("_", " ")} data.')

    attr = scale_attribute
    if SCALED_LABEL not in assay.layers or assay.metadata[
            DFT.SCALE_ATTR] != attr or not assay.metadata[DFT.PREPPED]:
        assay.scale_data(scale_attribute)
        assay.add_metadata(DFT.SCALE_ATTR, attr)

    attr = f'{pca_attribute}'
    if pca_attribute == SCALED_LABEL:
        attr = f'scaled {scale_attribute}'

    if PCA_LABEL not in assay.row_attrs or assay.metadata[
            DFT.PCA_ATTR] != attr or not assay.metadata[DFT.PREPPED]:
        assay.run_pca(pca_attribute, components=pca_comps)
        assay.add_metadata(DFT.PCA_ATTR, attr)

    attr = f'{umap_attribute}'
    if umap_attribute == SCALED_LABEL:
        attr = f'scaled {scale_attribute}'
    if umap_attribute == PCA_LABEL:
        if pca_attribute == SCALED_LABEL:
            attr = f'PCA of scaled {scale_attribute}'
        else:
            attr = f'PCA of {pca_attribute}'

    if UMAP_LABEL not in assay.row_attrs or assay.metadata[
            DFT.UMAP_ATTR] != attr or not assay.metadata[DFT.PREPPED]:
        assay.run_umap(attribute=umap_attribute, random_state=42)
        assay.add_metadata(DFT.UMAP_ATTR, attr)

    if not assay.metadata[DFT.INITIALIZE]:
        assay.add_metadata(DFT.CLUSTERED, False)

    assay.add_metadata(DFT.PREPPED, True)
Esempio n. 10
0
def status():
    print(request.get_data())
    sys.stdout.flush() 

    return jsonify(status=interface.status())
Esempio n. 11
0
import streamlit as st

import interface
import defaults as DFT
from tasks import (load, preprocess, prepare, cluster, customize, save, visual)

st.set_page_config(page_title='Mosaic', layout='wide')
interface.init()
interface.subheader('GUI for Mosaic built using Streamlit')
interface.status('v0.1.2')

sample, should_save, save_name = load.run()

current_assay, available_assays = preprocess.run(sample)

prepare.run(current_assay, available_assays)
cluster.run(current_assay, available_assays)
customize.run(current_assay)
save.run(sample, save_name, should_save)

visual.run(sample, current_assay)

for a in available_assays:
    a.add_metadata(DFT.INITIALIZE, False)
Esempio n. 12
0
def make_python_call_string(title, *args, font=subfont):
    python_loc = "/Users/rpurp/.pyenv/shims/python"
    # TODO just python 3?

    command = "{} | bash={} ".format(title, python_loc)

    for i, arg in enumerate(args, 1):
        command += 'param{}="{}" '.format(i, arg)

    command += "terminal=false refresh=true"
    command += font
    return command


print(interface.status())
print('---')
print('Track' + titlefont)

for subject in interface.get_subjects():
    print(
        make_python_call_string(subject, script, "-t", subject,
                                font=trackfont))
print(make_python_call_string("mark", script, "-m"))
print(make_python_call_string("cancel", script, "-c"))
print(make_python_call_string("end", script, "-e"))

print('---')

now = datetime.datetime.now()
Esempio n. 13
0
import streamlit as st

import interface
from tasks import (load, preprocess, prepare, cluster, customize, save, visual)

st.set_page_config(page_title='Mosaic', layout='wide')
interface.init()
interface.subheader('GUI for Mosaic built using Streamlit')
interface.status('v0.4.1')

sample, should_save, save_name = load.run()

current_assay, available_assays = preprocess.run(sample)

prepare.run(current_assay, available_assays)
cluster.run(current_assay, available_assays)

sample_kept, current_assay_kept = customize.run(sample, current_assay)

visual_type = visual.run(sample_kept, current_assay_kept)

if should_save:
    save.run(sample_kept, save_name)

save.store_metadata(sample, current_assay, visual_type, available_assays)
Esempio n. 14
0
def render(sample, assay):
    interface.status('Creating visuals.')

    category, kind = assay.metadata[DFT.VISUAL_TYPE]
    options = DFT.VISUALS[category][1]
    column_sizes = DFT.VISUALS[category][0]
    columns = st.beta_columns(column_sizes)
    with columns[0]:
        new_category = st.selectbox("", list(DFT.VISUALS.keys()))
        if new_category != category:
            assay.add_metadata(DFT.VISUAL_TYPE,
                               [new_category, DFT.VISUALS[new_category][1][0]])
            interface.rerun()

    for i in range(len(options)):
        with columns[i + 1]:
            st.markdown(f"<p style='margin-bottom:33px'></p>",
                        unsafe_allow_html=True)
            clicked = st.button(options[i], key=f'visual-{options[i]}')
            if clicked:
                kind = options[i]
                assay.add_metadata(DFT.VISUAL_TYPE, [category, kind])

    if kind in DFT.LAYOUT:
        columns = st.beta_columns(DFT.LAYOUT[kind])
        args_conatiner = columns[0]
        plot_columns = columns[1:]
    else:
        columns = st.beta_columns([0.75, 0.1, 2])
        args_conatiner = columns[0]
        plot_columns = columns[2]

    with args_conatiner:
        kwargs = {}
        analyte_map = {'protein': 'Protein', 'dna': 'DNA'}

        if kind == DFT.SIGNATURES:
            kwargs['layer'] = st.selectbox('Layer', DFT.LAYERS[assay.name])
            kwargs['attribute'] = st.selectbox(
                'Signature', ['Median', 'Standard deviation', 'p-value'])
        elif kind == DFT.HEATMAP:
            kwargs['attribute'] = st.selectbox('Attribute',
                                               DFT.LAYERS[assay.name],
                                               key='Visualization Attribute')
            kwargs['splitby'] = st.selectbox('Split by',
                                             DFT.SPLITBY[assay.name])
            kwargs['orderby'] = st.selectbox('Order by',
                                             DFT.LAYERS[assay.name],
                                             key='Visualization Orderby')
            kwargs['cluster'] = st.checkbox('Cluster within labels', True)
            kwargs['convolve'] = st.slider('Smoothing', 0, 100)
        elif kind == DFT.SCATTERPLOT:
            kwargs['attribute'] = st.selectbox('Attribute', DFT.ATTRS_2D)
            kwargs['colorby'] = st.selectbox('Color by',
                                             DFT.COLORBY[assay.name])
            if kwargs['colorby'] not in DFT.SPLITBY[assay.name] + ['density']:
                features = st.multiselect(
                    'Features', list(assay.ids()),
                    list(assay.ids())[:min(len(assay.ids()), 4)])
                if len(features) != 0:
                    kwargs['features'] = features
        elif kind == DFT.FEATURE_SCATTER:
            kwargs['layer'] = st.selectbox('Layer', DFT.LAYERS[assay.name])
            feature1 = st.selectbox('Feature 1', list(assay.ids()), index=0)
            feature2 = st.selectbox('Feature 1', list(assay.ids()), index=2)
            kwargs['ids'] = [feature1, feature2]
            kwargs['colorby'] = st.selectbox('Color by',
                                             DFT.COLORBY[assay.name])
        elif kind == DFT.VIOLINPLOT:
            kwargs['attribute'] = st.selectbox('Attribute',
                                               DFT.LAYERS[assay.name])
            kwargs['splitby'] = st.selectbox('Split by',
                                             DFT.SPLITBY[assay.name])
            kwargs['points'] = st.checkbox('Box and points', False)
            features = st.multiselect(
                'Features', list(assay.ids()),
                list(assay.ids())[:min(len(assay.ids()), 4)])
            if len(features) != 0:
                kwargs['features'] = features
        elif kind == DFT.RIDGEPLOT:
            kwargs['attribute'] = st.selectbox('Attribute',
                                               DFT.LAYERS[assay.name])
            kwargs['splitby'] = st.selectbox('Split by',
                                             DFT.SPLITBY[assay.name])
            features = st.multiselect(
                'Features', list(assay.ids()),
                list(assay.ids())[:min(len(assay.ids()), 4)])
            if len(features) != 0:
                kwargs['features'] = features
        elif kind == DFT.STRIPPLOT:
            kwargs['attribute'] = st.selectbox('Attribute',
                                               DFT.LAYERS[assay.name])
            kwargs['colorby'] = st.selectbox('Colorby', DFT.LAYERS[assay.name])
            features = st.multiselect(
                'Features', list(assay.ids()),
                list(assay.ids())[:min(len(assay.ids()), 4)])
            if len(features) != 0:
                kwargs['features'] = features
        elif kind == DFT.DNA_PROTEIN_PLOT:
            kwargs['analyte'] = st.selectbox(
                'Analyte', ['protein'], format_func=lambda a: analyte_map[a])
            kwargs['dna_features'] = st.multiselect('DNA features',
                                                    list(sample.dna.ids()),
                                                    sample.dna.ids()[:4])
            kwargs['protein_features'] = st.multiselect(
                'Protein features', list(sample.protein.ids()),
                sample.protein.ids()[:4])
        elif kind == DFT.DNA_PROTEIN_HEATMAP:
            kwargs['clusterby'] = st.selectbox(
                'Cluster by', ['dna', 'protein'],
                format_func=lambda a: analyte_map[a])
            kwargs['sortby'] = st.selectbox(
                'Sort by', ['dna', 'protein'],
                format_func=lambda a: analyte_map[a])
            kwargs['dna_features'] = st.multiselect('DNA features',
                                                    list(sample.dna.ids()),
                                                    sample.dna.ids())
            kwargs['protein_features'] = st.multiselect(
                'Protein features', list(sample.protein.ids()),
                sample.protein.ids())

        elif kind == DFT.METRICS:
            st.header('')
            interface.info(
                '<b>Some values might be missing in case the raw<br> files are not loaded.</b> These metrics can be<br> pasted into the metrics sheet as is.'
            )
        elif kind == DFT.READ_DEPTH:
            if assay.name == PROTEIN_ASSAY:
                kwargs['layer'] = st.selectbox('Layer', DFT.LAYERS[assay.name])
                kwargs['colorby'] = st.selectbox('Color by', ['density', None])
                kwargs['features'] = st.multiselect(
                    'Features', list(assay.ids()),
                    list(assay.ids())[:min(len(assay.ids()), 4)])
            else:
                st.header('')
                interface.info('<b>Only applicable for the protein assay</b>')
        elif kind == DFT.ASSAY_SCATTER:
            kwargs['draw'] = sample.protein_raw is not None
            if not kwargs['draw']:
                interface.info('<b>Raw files needed for this plot.</b>')
        elif kind == DFT.DOWNLOAD:
            kwargs['item'] = st.selectbox('Object to Download',
                                          DFT.DOWNLOAD_ITEMS)
            kwargs['download'] = st.button('Download', key='download_button')

    return plot_columns, kind, kwargs
Esempio n. 15
0
def run(sample, assay):
    plot_columns, kind, visualization_kwargs = render(sample, assay)
    visual(sample, assay, kind, plot_columns, visualization_kwargs)

    interface.status('Done.')