Beispiel #1
0
    def post_collection(self, name, analysis_ids):
        self.load_dataframes()
        parent_collections = [
            get_collection(current_user, collection_id) for collection_id in self.loaded_collection_ids
        ]
        label_df = self._label_df
        # merge collection attributes
        if len(self.loaded_collection_ids) > 1:
            collection_lengths = [
                len(self._label_df[self._label_df.original_collection_id == collection_id])
                for collection_id in self.loaded_collection_ids
            ]
            collection_attrs = [collection.get_file_attributes() for collection in parent_collections]
            all_attr_keys = set.union(*[set(collection_attr.keys()) for collection_attr in collection_attrs])
            attr_values = {
                attr_key: [
                    collection_attr[attr_key] if attr_key in collection_attr else None
                    for collection_attr in collection_attrs
                ]
                for attr_key in all_attr_keys
            }
            attrs = {}
            for attr_key, attr_values_ in attr_values.items():
                valid_values = [v for v in attr_values_ if v is not None]
                if len(valid_values) == 1:
                    attrs[attr_key] = valid_values[0]
                else:
                    vals = [[v for _ in range(length)] for v, length in zip(valid_values, collection_lengths)]
                    label_df[attr_key] = [item for sublist in vals for item in sublist]
        else:
            attrs = get_collection(current_user, self.loaded_collection_ids[0]).get_file_attributes()

        filename = os.path.join(self.root_dir, 'processed.h5')
        attrs['processing_log'] = self.processing_log
        description = '\n\n'.join([collection.description for collection in parent_collections] + [self.processing_log])

        new_data = {
            'name': name,
            'description': description,
            'analysis_ids': analysis_ids,
            'parent_collection_id': self.loaded_collection_ids[0],
            'group_can_read': all([collection.group_can_read for collection in parent_collections]),
            'all_can_read': all([collection.all_can_read for collection in parent_collections]),
            'group_can_write': all([collection.group_can_write for collection in parent_collections]),
            'all_can_write': all([collection.all_can_write for collection in parent_collections])
        }

        self.write_collection(self._numeric_df, label_df, attrs, filename)
        new_collection = upload_collection(current_user, filename, new_data)
        return new_collection
Beispiel #2
0
 def results_filename(self):
     if self.results_collection_id is not None:
         try:
             return get_collection(current_user,
                                   self.results_collection_id).filename
         except:
             return None
     return None
Beispiel #3
0
 def get_collections(self, collection_ids):
     super().get_collections(collection_ids)
     try:
         self.processing_log = get_collection(current_user, collection_ids[0]).get_attr('processing_log')
     except Exception as e:
         print(e)
         self.processing_log = ''
     x = [float(i) for i in self._numeric_df.columns]
     self.x_axis_range = [max(x), min(x)]
     y_max = np.max(self._numeric_df.values)
     self.y_axis_range = [-0.05*y_max, 1.05 * y_max]
Beispiel #4
0
def render_collection(collection_id=None):
    try:
        current_user = get_current_user()
        collection = get_collection(current_user, collection_id)
        if request.method == 'DELETE':
            delete_collection(current_user, collection_id)
            return redirect(url_for('collections.render_collection_list'))
        if request.method == 'POST':
            dataset_name = request.form.get('dataset_name')
            dataset_type = request.form.get('dtype')
            dt.collections.create_new_label_dataset(current_user, collection,
                                                    dataset_name, dataset_type)
        return render_template('pages/collection_entry.html',
                               page_data=CollectionPageData(
                                   current_user, collection))
    except Exception as e:
        return handle_exception_browser(e)
Beispiel #5
0
 def _save_loadings(group_key, name):
     with h5py.File(self.results_filename, 'r') as results_file:
         group = results_file[group_key]
         values = [
             np.array(group['pls']['x_loadings']),
             np.array(group['feature_p_values']),
             np.array(group['opls']['P_ortho'])
         ]
         columns = ['x_loadings', 'p_value'] + [
             f'P_ortho[{i}]' for i in range(values[2].shape[1])
         ]
         input_collection_id = results_file.attrs[
             'input_collection_id'] if 'input_collection_id' in results_file.attrs else None
     if input_collection_id is not None:
         try:
             input_collection = get_collection(current_user,
                                               input_collection_id)
             x = input_collection.get_dataset('x')
             try:
                 x_min = input_collection.get_dataset('x_min')
                 x_max = input_collection.get_dataset('x_max')
             except:
                 x_min = x_max = None
         except:
             x_min = x_max = x = None
     else:
         x_min = x_max = x = None
     df = pd.DataFrame(np.column_stack(values), columns=columns)
     if x is not None and len(x) == len(df):
         df.index = x
         df.index.name = 'x'
     if x_min is not None and len(x_min) == len(df):
         df['x_min'] = x_min
         df['x_max'] = x_max
         df = df[['x_min', 'x_max'] + columns]
     if file_format == 'csv':
         df.to_csv(name)
Beispiel #6
0
    def get_plot(self, queries, group_by, labels, theme, bin_collection_id, legend_style, background_color):
        print(background_color)
        labels = labels or []
        self.load_dataframes()
        if bin_collection_id is not None:
            print(bin_collection_id)
            bin_collection = get_collection(current_user, bin_collection_id)
            x_mins = bin_collection.get_dataset('x_min').ravel().tolist()
            x_maxes = bin_collection.get_dataset('x_max').ravel().tolist()
            colors = [DEFAULT_PLOTLY_COLORS[i % 2] for i in range(len(x_mins))]
            shapes = [
                go.layout.Shape(
                    type='rect',
                    xref='x',
                    yref='paper',
                    x0=x_min,
                    y0=0,
                    x1=x_max,
                    y1=1,
                    fillcolor=color,
                    opacity=0.2,
                    layer='below',
                    line_width=0
                )
                for x_min, x_max, color in zip(x_mins, x_maxes, colors)
            ]
        else:
            shapes = []

        axis_line_style = {
            'zerolinecolor': '#375A7F',  # darkly primary
            'gridcolor': '#444444'  # darkly secondary
        } if theme == 'plotly_dark' and background_color != 'rgba(255,255,255,1)' else {
            'zerolinecolor': '#2C3E50',  # flatly primary
            'gridcolor': '#95A5A6'  # flatly secondary
        }
        if legend_style in ('full', 'groups'):
            layout = go.Layout(
                height=700,
                font={'size': 16},
                margin={'t': 25, 'l': 25, 'b': 25, 'r': 25},
                template=theme,
                plot_bgcolor=background_color,
                paper_bgcolor=background_color,
                xaxis={
                    'title': 'Chemical Shift (ppm)',
                    'autorange': 'reversed',
                    **axis_line_style
                },
                yaxis={
                    'title': 'Intensity',
                    **axis_line_style
                },
                shapes=shapes
            )
        else:  # if legend_style == 'none'
            layout = go.Layout(
                height=700,
                font={'size': 16},
                margin={'t': 25, 'l': 25, 'b': 25, 'r': 25},
                template=theme,
                plot_bgcolor=background_color,
                paper_bgcolor=background_color,
                xaxis={
                    'title': 'Chemical Shift (ppm)',
                    'autorange': 'reversed',
                    **axis_line_style
                },
                yaxis={
                    'title': 'Intensity',
                    **axis_line_style
                },
                shapes=shapes,
                showlegend=False
            )

        color_indices = [self._label_df.query(query).index for query in queries]
        if len(color_indices) > len(DEFAULT_PLOTLY_COLORS):  # repeat default color list
            colors = []
            while len(colors) < len(color_indices):
                colors += DEFAULT_PLOTLY_COLORS
        else:
            colors = DEFAULT_PLOTLY_COLORS
        colors = colors[:len(color_indices)]
        x = self._numeric_df.columns.values.astype(float)
        figure = go.Figure(layout=layout)

        if legend_style == 'full' or legend_style == 'groups':
            figure.add_trace(
                go.Scatter(  # dummy series to use as stand-in for legend title
                    x=[0],
                    y=[0],
                    name=','.join(group_by),
                    mode='markers',
                    marker={
                        'opacity': 0,
                        'size': 0,
                        'color': 'rgba(0,0,0,0)'
                    }
                )
            )

            for query, color in zip(queries, colors):
                # split query
                figure.add_trace(
                    go.Scatter(  # dummy series to label colors
                        x=[0],
                        y=[0],
                        name=','.join(re.findall(r'["](\w+)["]', query)),  # pretty kludgy
                        mode='lines',
                        marker={'color': color},
                        legendgroup=query
                    )
                )

            figure.add_trace(
                go.Scatter(  # dummy series to provide space between color key and "heading"
                    x=[0],
                    y=[0],
                    name='',
                    mode='markers',
                    marker={
                        'opacity': 0,
                        'size': 0,
                        'color': 'rgba(0,0,0,0)'
                    }
                )
            )

        if legend_style == 'full':
            figure.add_trace(
                go.Scatter(  # dummy series to use as stand-in for legend title
                    x=[0],
                    y=[0],
                    name=f"({', '.join(labels)})" if len(labels) else 'Spectrum #',
                    mode='markers',
                    marker={
                        'opacity': 0,
                        'size': 0,
                        'color': 'rgba(0,0,0,0)'
                    }
                )
            )

        for query, color in zip(queries, colors):
            y_values = self._numeric_df.loc[self._label_df.query(query).index]
            for i, row in y_values.iterrows():
                text = '<br>'.join([f'{label}=={self._label_df.loc[i][label]}' for label in self._label_df.columns])
                if len(labels):
                    name = f"({', '.join([f'{self._label_df.loc[i][label]}' for label in labels])})"
                else:
                    name = f'({i})'
                if legend_style == 'groups':
                    figure.add_trace(
                        go.Scatter(
                            x=x,
                            y=row,
                            text=text,
                            name=','.join(re.findall(r'["](\w+)["]', query)),  # pretty kludgy
                            mode='lines',
                            marker={'color': color, 'size': 1},
                            legendgroup=query,
                            showlegend=False
                        )
                    )
                else:
                    figure.add_trace(
                        go.Scatter(
                            x=x,
                            y=row,
                            text=text,
                            name=name,
                            mode='lines',
                            marker={'color': color, 'size': 2},
                            showlegend=(legend_style == 'full')
                        )
                    )

        return figure
Beispiel #7
0
    def get_loading_significance_table(self, group_key, theme=None, wrap=True):
        description = h5py.File(
            self.results_filename)[group_key].attrs['description']
        if self.results_file_ready:
            theme, style_header, style_cell = self._get_table_styles(theme)
            with h5py.File(self.results_filename, 'r') as file:
                feature_labels = np.array(file[group_key]['feature_labels'])
                loadings = np.array(
                    file[group_key]['pls']['x_loadings']).ravel()
                p_values = np.array(file[group_key]['feature_p_values'])
                alpha = file[group_key].attrs['outer_alpha']
                base_collection_id = file.attrs[
                    'input_collection_id'] if 'input_collection_id' in file.attrs else None
            x_min = x_max = None
            if base_collection_id is not None:
                try:
                    base_collection = get_collection(current_user,
                                                     int(base_collection_id))
                    x = base_collection.get_dataset('/x').ravel()
                    x_min = base_collection.get_dataset('/x_min').ravel()
                    x_max = base_collection.get_dataset('/x_max').ravel()
                    x_min = x_min[np.in1d(x, feature_labels)]
                    x_max = x_max[np.in1d(x, feature_labels)]
                except Exception as e:
                    x_min = x_max = None
            valid_bin_boundaries = (x_min is not None and x_max is not None
                                    and x_max.shape[0] == x_min.shape[0] ==
                                    feature_labels.shape[0])
            is_significant = p_values < alpha
            df = pd.DataFrame()
            df['Bin'] = feature_labels
            df['Loading'] = loadings
            if valid_bin_boundaries:
                df['Bin Max'] = x_max
                df['Bin Min'] = x_min
            df['p Value'] = p_values
            df['Significant'] = ['*' if s else '' for s in is_significant]
            df = df.sort_values(['Significant', 'Bin'],
                                ascending=[False, True])

            # format table for better display in browser
            df['p Value'] = df['p Value'].round(7).apply(
                lambda val: f'{val:.7f}')
            df['Bin'] = df['Bin'].round(4).apply(lambda val: f'{val:.4f}')
            df['Loading'] = df['Loading'].round(5).apply(
                lambda val: f'{val:.5f}')
            if valid_bin_boundaries:
                df['Bin Max'] = df['Bin Max'].round(4).apply(
                    lambda val: f'{val:.4f}')
                df['Bin Min'] = df['Bin Min'].round(4).apply(
                    lambda val: f'{val:.4f}')
            del df['Significant']
            df['Index'] = [str(i) for i in df.index]
            if valid_bin_boundaries:
                df = df[[
                    'Index', 'Bin Max', 'Bin', 'Bin Min', 'Loading', 'p Value'
                ]]
            else:
                df = df[['Index', 'Bin', 'Loading', 'p Value']]
            style_data_conditional = [{
                'if': {
                    'filter_query': f'{{p Value}} < {alpha}'
                },
                'backgroundColor': '#D2F9F1'
            }, {
                'if': {
                    'filter_query': f'{{p Value}} > {alpha}'
                },
                'backgroundColor': '#F9D9D2'
            }]
            table = dash_table.DataTable(
                id=f'feature-table',
                columns=[{
                    'name': i,
                    'id': i
                } for i in df.columns],
                data=df.to_dict('records'),
                style_table={
                    'height': '500px',
                    'overflowY': 'scroll'
                },
                fixed_rows={
                    'headers': True,
                    'data': 0
                },
                is_focused=True,
                style_data_conditional=style_data_conditional,
                style_header=style_header,
                style_cell=style_cell,
                style_cell_conditional=[
                    {
                        'if': {
                            'column_id': 'Index'
                        },
                        'width': f'{max(df.Index.str.len().max(), 5) + 2}ch'
                    },
                    {
                        'if': {
                            'column_id': 'Bin'
                        },
                        'width': f'{df.Bin.str.len().max() + 2}ch'
                    },
                    {
                        'if': {
                            'column_id': 'Bin Max'
                        },
                        'width': f'{df.Bin.str.len().max() + 2}ch'
                    },
                    {
                        'if': {
                            'column_id': 'Bin Min'
                        },
                        'width': f'{df.Bin.str.len().max() + 2}ch'
                    },
                    {
                        'if': {
                            'column_id': 'p Value'
                        },
                        'width': '12ch'
                    },
                ])
            if wrap:
                return html.Div(
                    [dbc.Row(html.H4(description)),
                     dbc.Row(table)])
            else:
                return table
        return [dash_table.DataTable(id='feature-table')]