Ejemplo n.º 1
0
def get_unique_users():
    """
        Counts the number of unique mapping users

        Cached weekly
    """
    users = query_item('user', projection=['user_email'])
    return len(users)
Ejemplo n.º 2
0
 def reports(self):
     filters = [('user_id', '=', self.user_id)]
     # Note this requires a composite index defined very precisely.
     results = query_item('trait', filters=filters, order=['user_id', '-created_on'])
     results = sorted(results, key=lambda x: x['created_on'], reverse=True)
     results_out = defaultdict(list)
     for row in results:
         results_out[row['report_slug']].append(row)
     # Generate report objects
     return results_out
Ejemplo n.º 3
0
def validate_report_name_unique(form, field):
    """
        Checks to ensure that the report name submitted is unique.
    """
    report_slug = slugify(form.report_name.data)
    try:
        reports = query_item('trait', filters=[('report_slug', '=', report_slug)])
        if len(reports) > 0:
            raise ValidationError(f"That report name is not available. Choose a unique report name")
    except BadRequest:
        raise ValidationError(f"Backend Error")
Ejemplo n.º 4
0
def get_latest_public_mappings():
    """
        Returns the 5 most recent mappings
    """
    recent_traits = list(query_item('trait',
                                    filters=[('is_public', '=', True), ('status', '=', 'complete')],
                                    projection=('report_slug', 'trait_name', 'created_on',),
                                    limit=5))
    for trait in recent_traits:
        trait['created_on'] = arrow.get(int(trait['created_on'])/1e6)
    return recent_traits
Ejemplo n.º 5
0
def report_data(report_slug):
    trait_set = query_item('trait',
                           filters=[('report_slug', '=', report_slug)])

    # Get first report if available.
    try:
        trait = trait_set[0]
    except IndexError:
        try:
            trait_set = query_item('trait',
                                   filters=[('secret_hash', '=', report_slug)])
            trait = trait_set[0]
        except IndexError:
            flash('Cannot find report', 'danger')
            return abort(404)

    return Response(trait['trait_data'],
                    mimetype="text/csv",
                    headers={
                        "Content-disposition":
                        "attachment; filename=%s.tsv" % report_slug
                    })
Ejemplo n.º 6
0
def mapping_interval(report_name, trait_name, peak):
    try:
        trait = query_item('trait',
                           filters=[('report_trait', '=',
                                     f"{report_name}:{trait_name}")])[0]
    except IndexError:
        err = f"Report - Trait not found: {report_slug}:{trait_name}"
        logger.error(err)
        return err, 404
    trait = trait_m(trait.key.name)
    interval_summary = trait.get_gs_as_dataset(
        "interval_variants.tsv.gz").fillna("")
    interval_summary = interval_summary[interval_summary.peak == peak.replace(
        "_", ":")]
    interval_summary = interval_summary.loc[:,
                                            ("CHROM", "POS", "REF", "ALT",
                                             "impact", "effect", "aa_change",
                                             "gene_name", "gene_id",
                                             "corrected_spearman_cor_p")]
    interval_summary['color'] = interval_summary.impact.apply(
        lambda x: impact_colors[x])
    try:
        interval_summary['name'] = interval_summary.apply(
            lambda x:
            f"{x.gene_name} ({x.gene_id}) - {x.effect}\n{x.aa_change}",
            axis=1)
    except ValueError:
        columns = ("CHROM", "POS", "REF", "ALT", "impact", "effect",
                   "aa_change", "gene_name", "gene_id",
                   "corrected_spearman_cor_p")
        return jsonify(None)

    # Take top 25 most correlated genes.
    top_genes = list(interval_summary.groupby('gene_id') \
                                .corrected_spearman_cor_p \
                                .apply(lambda x: max(x)) \
                                .nlargest(25) \
                                .reset_index() \
                                .gene_id.values)

    #interval_summary = interval_summary[interval_summary['gene_id'].isin(top_genes)]
    interval_summary = interval_summary[interval_summary['gene_id'].isin(
        top_genes)][:500]

    out = {
        k: list(interval_summary[k])
        for k in interval_summary.columns.values
    }
    return jsonify(out)
Ejemplo n.º 7
0
def get_mappings_summary():
    """
        Generates the cumulative sum of reports and traits mapped.

        Cached daily
    """
    traits = query_item('trait')

    traits = pd.DataFrame.from_dict(traits)
    traits.created_on = traits.apply(lambda x: arrow.get(str(x['created_on'])[:-6]).date().isoformat(), axis=1)

    trait_df = traits.groupby('created_on').size().reset_index(name='traits')
    report_df = traits[['report_slug', 'created_on']].drop_duplicates().groupby('created_on').size().reset_index(name='reports')
    df = pd.merge(report_df, trait_df, how='outer').fillna(0).sort_values('created_on')
    df.reports = df.reports.cumsum()
    df.traits = df.traits.cumsum()
    return df
Ejemplo n.º 8
0
def public_mapping():
    query = request.args.get("query")
    title = "Public Mappings"
    pub_mappings = query_item('mapping', filters=[('is_public', '=', True)])
    return render_template('public_mapping.html', **locals())
Ejemplo n.º 9
0
def report_view(report_slug, trait_name=None, rerun=None):
    """
        This view will handle logic of handling legacy reports
        and v2 reports.

    """

    trait_set = query_item('trait',
                           filters=[('report_slug', '=', report_slug)])

    # Get first report if available.
    try:
        trait = trait_set[0]
    except IndexError:
        try:
            trait_set = query_item('trait',
                                   filters=[('secret_hash', '=', report_slug)])
            trait = trait_set[0]
        except IndexError:
            flash('Cannot find report', 'danger')
            return abort(404)

    # Enable reruns
    if rerun:
        trait_set = [x for x in trait_set if x['trait_name'] == trait_name]
        for n, existing_trait in enumerate(trait_set):
            logger.info(n)
            logger.info(existing_trait.key)
            delete_item(existing_trait)
        trait = trait_m(trait_set[0])

        mapping_items = query_item('mapping',
                                   filters=[('report_slug', '=', report_slug),
                                            ('trait_slug', '=', trait_name)])
        for existing_mapping in mapping_items:
            delete_item(existing_mapping)

        trait.status = "Rerunning"
        # Running the task will save it.
        trait.run_task()
        return redirect(
            url_for('mapping.report_view',
                    report_slug=report_slug,
                    trait_name=trait_name))

    # Verify user has permission to view report
    user = session.get('user')
    if not trait.get('is_public'):
        if user:
            user_id = user.get('user_id')
        else:
            user_id = None
        if trait['secret_hash'] != report_slug and user_id != trait['user_id']:
            flash('You do not have access to that report', 'danger')
            return abort(404)

    if not trait_name:
        logger.error("Trait name not found")
        # Redirect to the first trait
        return redirect(
            url_for('mapping.report_view',
                    report_slug=report_slug,
                    trait_name=trait_set[0]['trait_name']))

    try:
        # Resolve REPORT --> TRAIT
        # Fetch trait and convert to trait object.
        cur_trait = [x for x in trait_set if x['trait_name'] == trait_name][0]
        trait = trait_m(cur_trait.key.name)
        trait.__dict__.update(cur_trait)
        logger.info(trait)
    except IndexError:
        return abort(404)

    VARS = {
        'title': trait.report_name,
        'subtitle': trait_name,
        'trait_name': trait_name,
        'report_slug': report_slug,
        'trait': trait,
        'trait_set': trait_set,
        'BIOTYPES': BIOTYPES,
        'TABLE_COLORS': TABLE_COLORS,
        'n_peaks': 0
    }

    # Set status to error if the container is stopped and status is not set to complete.
    if trait.container_status() == 'STOPPED' and trait.status != "complete":
        trait.status = 'error'
        trait.save()

    if trait.status == 'complete':
        if trait.REPORT_VERSION == 'v1':
            """
                VERSION 1
            """
            phenotype_data = trait.get_gs_as_dataset("tables/phenotype.tsv")
            isotypes = list(phenotype_data.iloc[:, 1].dropna().values)
            phenotype_data = list(phenotype_data.iloc[:, 3].values)
            VARS.update({
                'phenotype_data': phenotype_data,
                'isotypes': isotypes
            })
            if trait.is_significant:
                interval_summary = trait.get_gs_as_dataset("tables/interval_summary.tsv.gz") \
                                        .rename(index=str, columns={'gene_w_variants': 'genes w/ variants'})
                try:
                    variant_correlation = trait.get_gs_as_dataset(
                        "tables/variant_correlation.tsv.gz")
                    max_corr = variant_correlation.groupby(
                        ['gene_id',
                         'interval']).apply(lambda x: max(abs(x.correlation)))
                    max_corr = max_corr.reset_index().rename(
                        index=str, columns={0: 'max_correlation'})
                    variant_correlation = pd.merge(variant_correlation, max_corr, on=['gene_id', 'interval']) \
                                            .sort_values(['max_correlation', 'gene_id'], ascending=False)
                except (urllib.error.HTTPError, pd.errors.EmptyDataError):
                    variant_correlation = []
                peak_summary = trait.get_gs_as_dataset(
                    "tables/peak_summary.tsv.gz")
                peak_summary['interval'] = peak_summary.apply(
                    lambda row:
                    f"{row.chrom}:{row.interval_start}-{row.interval_end}",
                    axis=1)
                first_peak = peak_summary.iloc[0]
                VARS.update({
                    'peak_summary': peak_summary,
                    'first_peak': first_peak,
                    'n_peaks': len(peak_summary),
                    'variant_correlation': variant_correlation,
                    'interval_summary': interval_summary
                })

        elif trait.REPORT_VERSION == "v2":
            """
                VERSION 2
            """
            # If the mapping is complete:
            # Phenotype plot

            phenotype_plot = plotly_distplot(trait._trait_df, trait_name)
            VARS.update({'phenotype_plot': phenotype_plot})
            # Fetch datafiles for complete runs
            VARS.update({'n_peaks': 0})
            if trait.is_significant:
                peak_summary = trait.get_gs_as_dataset("peak_summary.tsv.gz")
                try:
                    first_peak = peak_summary.loc[0]
                    chrom, interval_start, interval_end = re.split(
                        ":|\-", first_peak['interval'])
                    first_peak.chrom = chrom
                    first_peak.pos = int(first_peak['peak_pos'].split(":")[1])
                    first_peak.interval_start = int(interval_start)
                    first_peak.interval_end = int(interval_end)
                except:
                    first_peak = None

                try:
                    variant_correlation = trait.get_gs_as_dataset(
                        "interval_variants.tsv.gz")
                except (pd.errors.EmptyDataError):
                    variant_correlation = pd.DataFrame()

                interval_summary = trait.get_gs_as_dataset("interval_summary.tsv.gz") \
                                        .rename(index=str, columns={'gene_w_variants': 'genes w/ variants'})

                peak_marker_data = trait.get_gs_as_dataset(
                    "peak_markers.tsv.gz")
                peak_summary = trait.get_gs_as_dataset("peak_summary.tsv.gz")
                VARS.update({
                    'pxg_plot': pxg_plot(peak_marker_data, trait_name),
                    'interval_summary': interval_summary,
                    'variant_correlation': variant_correlation,
                    'peak_summary': peak_summary,
                    'n_peaks': len(peak_summary),
                    'isotypes': list(trait._trait_df.ISOTYPE.values),
                    'first_peak': first_peak
                })

            # To handle report data, functions specific
            # to the version will be required.

    report_template = f"reports/{trait.REPORT_VERSION}.html"
    return render_template(report_template, **VARS)