Example #1
0
def cuffnorm(resource, annotation, use_ercc=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
    ``annotation`` and ``use_ercc`` parameters specified in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: `~resdk.resources.data.Data`
    :param bool use_ercc: use ERRCC spike-in controls for normalization

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    inputs = {
        'cuffquant': cuffquants,
        'annotation': get_data_id(annotation),
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
Example #2
0
def cuffnorm(resource, annotation, use_ercc=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
    ``annotation`` and ``use_ercc`` parameters specified in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: `~resdk.resources.data.Data`
    :param bool use_ercc: use ERRCC spike-in controls for normalization

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    inputs = {
        'cuffquant': cuffquants,
        'annotation': get_data_id(annotation),
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
Example #3
0
                control.append(get_data_id(sample.get_cuffquant()))
            else:
                raise ValueError(
                    "Position different from 'case' or 'control' was found in the "
                    "following relation: {}".format(relation.id))

        if not case or not control:
            continue

        inputs['case'] = case
        inputs['control'] = control

        cuffdiff_obj = resolwe.get_or_run(slug='cuffdiff', input=inputs)
        cuffdiff_objects.append(cuffdiff_obj)

        if is_collection(resource):
            resource.add_data(cuffdiff_obj)
        elif is_relation(resource):
            resource.collection.add_data(cuffdiff_obj)

    if not cuffdiff_objects:
        if not relations:
            raise ValueError(
                "No relation containing all of the given samples was found")
        else:
            raise ValueError(
                "No suitable relation was found (given samples all have either 'case' position "
                "or 'control' position")

    return cuffdiff_objects
Example #4
0
def cuffnorm(resource, annotation, use_ercc=None, threads=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
     ``annotation``, ``useERCC`` and ``threads`` parameters specified
     in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: int or `~resdk.resources.data.Data`
    :param bool useERCC: use ERRCC spike-in controls for normalization
    :param int threads: use this many threads to align reads
        (default: ``1``)

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    labels = []
    replicates = []
    replicates_ids = {}
    for sample in samples:
        relations = resolwe.relation.filter(type='group',
                                            label='replicates',
                                            entity=[sample.id],
                                            **relation_filter)

        if len(relations) == 1:
            relation = relations[0]
        else:
            raise LookupError(
                "Cannot determine unique group relation with label `replicates` for the "
                "following sample: {}".format(sample.name))

        if relation.id not in replicates_ids:
            replicates_ids[relation.id] = str(len(replicates_ids))
        replicates.append(replicates_ids[relation.id])

        if str(relation.id) not in labels:
            labels.append(str(relation.id))

    inputs = {
        'cuffquant': cuffquants,
        'replicates': replicates,
        'annotation': get_data_id(annotation),
        'labels': labels,
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    if threads is not None:
        inputs['threads'] = threads

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
Example #5
0
def bamplot(resource,
            genome,
            input_gff=None,
            input_region=None,
            stretch_input=None,
            color=None,
            sense=None,
            extension=None,
            rpm=None,
            yscale=None,
            names=None,
            plot=None,
            title=None,
            scale=None,
            bed=None,
            multi_page=None):
    """Run ``bamplot`` on the resource.

    This method runs `bamplot`_ with bams, genome and gff or region
    specified in arguments.

    .. _bamplot:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-bamplot

    :param list resource: resource from which bam objects will be get
    :param str genome: Genome used in the process (options are HG18,
        HG19, MM9 and MM10)
    :param input_gff: id of annotation file is given
    :type input_gff: int or `~resdk.resources.data.Data`
    :param str input_region: enter a genomic region
    :param int stretch_input: stretch the input regions to a minimum
        length
    :param str color: enter a colon separated list of colors
    :param str sense: map to forward, reverse or'both strand,
        default maps to ``both``
    :param int extension: extends reads by n bp, dfault value is 200bp
    :param bool rpm: normalizes density to reads per million (rpm),
        default is ``False``
    :param str yscale: choose either relative or uniform y axis scaling,
        default is ``relative scaling``
    :param str names: a comma separated list of names for your bams
    :param str plot: choose all lines on a single plot or multiple plots
    :param str title: title for the output plot(s), default will be the
        coordinate region
    :param str scale: a comma separated list of multiplicative scaling
        factors for your bams, default is ``None``
    :param list beds: subset of bed files to run process on, if empty
        processes for all bed files will be run
    :param bool multi_page: if flagged will create a new pdf for each
        region

    """
    input_objects = []

    if not input_gff and not input_region:
        raise KeyError('Please specify `input_gff` or `input_region.')
    if input_gff and input_region:
        raise KeyError('Please specify `input_gff` or `input_region.')

    valid_genomes = ['HG18', 'HG19', 'MM8', 'MM9', 'MM10']
    if genome not in valid_genomes:
        raise KeyError('Invalid `genome`, please use one of the following: '
                       '{}'.format(', '.join(valid_genomes)))

    bams = [sample.get_bam() for sample in get_samples(resource)]
    input_objects.extend(bams)
    bams = [get_data_id(bam) for bam in bams]

    inputs = {
        'genome': genome,
        'bam': bams,
    }

    if color is not None:
        inputs['color'] = color

    if sense is not None:
        inputs['scale'] = scale

    if extension is not None:
        inputs['extension'] = extension

    if rpm is not None:
        inputs['rpm'] = rpm

    if yscale is not None:
        inputs['yscale'] = yscale

    if names is not None:
        inputs['names'] = names

    if plot is not None:
        inputs['plot'] = plot

    if title is not None:
        inputs['title'] = title

    if scale is not None:
        inputs['scale'] = scale

    if multi_page is not None:
        inputs['multi_page'] = multi_page

    if input_gff is not None:
        input_objects.append(input_gff)
        inputs['input_gff'] = get_data_id(input_gff)

    if input_region is not None:
        inputs['input_region'] = input_region

    if bed is not None:
        if isinstance(bed, list):
            input_objects.extend(bed)
            inputs['bed'] = [get_data_id(bed_obj) for bed_obj in bed]
        else:
            input_objects.append(bed)
            inputs['bed'] = [get_data_id(bed)]

    resolwe = get_resolwe(*input_objects)

    bamplot_obj = resolwe.get_or_run(slug='bamplot', input=inputs)

    if is_collection(resource):
        resource.add_data(bamplot_obj)
    elif is_relation(resource):
        resource.collection.add_data(bamplot_obj)

    return bamplot_obj
def cuffdiff(resource, annotation, genome=None, multi_read_correct=None, fdr=None,
             library_type=None, library_normalization=None, dispersion_method=None):
    """Run Cuffdiff_ for selected cuffquants.

    This method runs `Cuffdiff`_ process with ``annotation`` specified
    in arguments. Library type is by defalt fr-unstranded. Other parameters
    defaults: multi_read_correct=false, fdr=0.05, library_normalization=geometric,
    dispersion_method=pooled, threads=1. Parameter genome is optional.

    The way the function works depends on the resource. If it is run on a collection,
    it will perform cuffdiff on every 'compare' relation labeled 'case-control' in
    the selected collection. If it is run on a list of samples (not necesssarily in
    the same collection) it will run cuffdiff on all 'compare' relations labeled
    'case-control' containing all of the given samples but will discard those
    samples in a relation that are not in the list of samples.

    .. _Cuffdiff:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-cuffdiff

    :param annotation: annotation file
    :type annotation: `~resdk.resources.data.Data`
    :param genome: genome object to use for bias detection and
        correction algorithm
    :type genome: `~resdk.resources.data.Data`
    :param bool multi_read_correct: do initial estimation procedure to
        more accurately weight reads with multiple genome mappings
    :param fdr: the allowed false discovery rate
    :type fdr: decimal
    :param str library_type: options are: fr-unstranded, fr-firststrand,
        fr-secondstrand
    :param str library_normalization: options are: geometric, classic-fpkm,
        quartile
    :param str dispersion_method: options are: pooled, per-condition,
        blind, poisson

    """
    inputs = {'annotation': get_data_id(annotation)}

    input_objects = [annotation]

    if genome is not None:
        inputs['genome'] = genome.id
        input_objects.append(genome)

    if multi_read_correct is not None:
        inputs['multi_read_correct'] = multi_read_correct

    if fdr is not None:
        inputs['fdr'] = fdr

    if library_type is not None:
        inputs['library_type'] = library_type

    if library_normalization is not None:
        inputs['library_normalization'] = library_normalization

    if dispersion_method is not None:
        inputs['dispersion_method'] = dispersion_method

    samples = get_samples(resource)
    sample_ids = [sample.id for sample in samples]

    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    collection_id = get_resource_collection(resource)

    relation_filter = {}
    if collection_id:
        relation_filter['collection'] = collection_id
    else:
        relation_filter['entity'] = sample_ids

    relations = resolwe.relation.filter(
        type='compare',
        **relation_filter
    )

    cuffdiff_objects = []
    for relation in relations:
        control = []
        case = []
        for partition in relation.partitions:
            sample = resolwe.sample.get(partition['entity'])
            label = partition['label']
            if sample.id not in sample_ids:
                continue

            if label == 'case':
                case.append(get_data_id(sample.get_cuffquant()))
            elif label == 'control':
                control.append(get_data_id(sample.get_cuffquant()))
            else:
                raise ValueError(
                    "Label different from 'case' or 'control' was found in the "
                    "following relation: {}".format(relation.id)
                )

        if not case or not control:
            continue

        inputs['case'] = case
        inputs['control'] = control

        cuffdiff_obj = resolwe.get_or_run(slug='cuffdiff', input=inputs)
        cuffdiff_objects.append(cuffdiff_obj)

        if is_collection(resource):
            resource.add_data(cuffdiff_obj)
        elif is_relation(resource):
            resource.collection.add_data(cuffdiff_obj)

    if not cuffdiff_objects:
        if not relations:
            raise ValueError("No relation containing all of the given samples was found")
        else:
            raise ValueError(
                "No suitable relation was found (given samples all have either 'case' label "
                "or 'control' label"
            )

    return cuffdiff_objects
Example #7
0
def bamliquidator(resource,
                  cell_type=None,
                  bin_size=None,
                  regions=None,
                  extension=None,
                  sense=None,
                  skip_plot=None,
                  black_list=None,
                  threads=None):
    """Run ``bamliquidator`` on the resource.

    This method runs `bamliquidator`_ with bams, where three different
    analysis type options are possible: Bin mode, Region mode and BED
    mode.

    .. _bamliquidator:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-bamliquidator

    :param list resource: resource from which bam objects will be get
    :param str cell_type: the name of cell type will be given in counts
        tables
    :param int bin_size: number of base pairs in each bin. Default is
        100000.
    :param regions: gtf or bed annotation object used in region mode
    :type regions: `~resdk.resources.data.Data`
    :param int extension: Extends reads by number of bp. Default is 200.
    :param str sense: Mapping strand to gff file. Use '+' for forwaed,
        '-' for reverse and '.' for both. Defoult is both.
    :param bool skip_plot: True for skip plot.
    :param list str black_list: One or more chromosome patterns to skip
        during bin liquidation. Default is to skip any chromosomes that
        contain any of the following substrings `chrUn`, `_random`,
        `Zv9_` or `_hap`.
    :param int threads: Number of CPUs

    """
    if not xor(bin_size, regions):
        raise KeyError(
            'Exactly one of `bin_size` and `regions` parameters must be given.'
        )

    if regions and not is_data(regions):
        raise KeyError('`regions` parameter must be data object.')

    input_objects = []

    bams = [sample.get_bam() for sample in get_samples(resource)]
    input_objects.extend(bams)
    bams = [get_data_id(bam) for bam in bams]

    inputs = {
        'bam': bams,
    }

    if bin_size:
        inputs['analysis_type'] = 'bin'
        inputs['bin_size'] = bin_size
    else:  # regions
        if regions.process_type == 'data:annotation:gtf:':
            inputs['analysis_type'] = 'gtf'
        elif regions.process_type == 'data:bed:':
            inputs['analysis_type'] = 'bed'
        else:
            raise KeyError(
                '`regions` object must be of type `data:annotation:gtf:` or `data:bed:`'
            )

        input_objects.append(regions)
        inputs['regions_file_gtf'] = get_data_id(regions)

    if cell_type is not None:
        inputs['cell_type'] = cell_type

    if extension is not None:
        inputs['extension'] = extension

    if sense is not None:
        inputs['sense'] = sense

    if skip_plot is not None:
        inputs['skip_plot'] = skip_plot

    if black_list is not None:
        inputs['black_list'] = black_list

    if threads is not None:
        inputs['threads'] = threads

    resolwe = get_resolwe(*input_objects)

    bamliquidator_obj = resolwe.get_or_run(slug='bamliquidator', input=inputs)

    if is_collection(resource):
        resource.add_data(bamliquidator_obj)
    elif is_relation(resource):
        resource.collection.add_data(bamliquidator_obj)

    return bamliquidator_obj
def cuffnorm(resource, annotation, use_ercc=None):
    """Run Cuffnorm_ for selected cuffquats.

    This method runs `Cuffnorm`_ process on ``resource`` with
    ``annotation`` and ``use_ercc`` parameters specified in arguments.

    .. _Cuffnorm:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-upload-expression-cuffnorm

    :param resource: resource on which cuffnorm will be run
    :param annotation: annotation object used in cuffnorm
    :type annotation: `~resdk.resources.data.Data`
    :param bool use_ercc: use ERRCC spike-in controls for normalization

    """
    relation_filter = {}
    collection_id = get_resource_collection(resource)
    if collection_id:
        relation_filter['collection'] = collection_id

    samples = get_samples(resource)

    input_objects = [annotation]
    input_objects.extend(samples)
    resolwe = get_resolwe(*input_objects)

    cuffquants = [get_data_id(sample.get_cuffquant()) for sample in samples]

    replicates = []
    replicates_ids = {}
    for sample in samples:
        relations = resolwe.relation.filter(
            type='group',
            entity=[sample.id],
            **relation_filter
        )

        if len(relations) == 1:
            relation = relations[0]
        else:
            raise LookupError(
                "Cannot determine unique group relation with label `replicates` for the "
                "following sample: {}".format(sample.name)
            )

        if relation.id not in replicates_ids:
            replicates_ids[relation.id] = str(len(replicates_ids))
        replicates.append(replicates_ids[relation.id])

    inputs = {
        'cuffquant': cuffquants,
        'replicates': replicates,
        'annotation': get_data_id(annotation),
    }

    if use_ercc is not None:
        inputs['useERCC'] = use_ercc

    cuffnorm_obj = resolwe.get_or_run(slug='cuffnorm', input=inputs)

    if is_collection(resource):
        resource.add_data(cuffnorm_obj)
    elif is_relation(resource):
        resource.collection.add_data(cuffnorm_obj)

    return cuffnorm_obj
Example #9
0
def bamliquidator(resource, cell_type=None, bin_size=None, regions=None, extension=None,
                  sense=None, skip_plot=None, black_list=None, threads=None):
    """Run ``bamliquidator`` on the resource.

    This method runs `bamliquidator`_ with bams, where three different
    analysis type options are possible: Bin mode, Region mode and BED
    mode.

    .. _bamliquidator:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-bamliquidator

    :param list resource: resource from which bam objects will be get
    :param str cell_type: the name of cell type will be given in counts
        tables
    :param int bin_size: number of base pairs in each bin. Default is
        100000.
    :param regions: gtf or bed annotation object used in region mode
    :type regions: `~resdk.resources.data.Data`
    :param int extension: Extends reads by number of bp. Default is 200.
    :param str sense: Mapping strand to gff file. Use '+' for forwaed,
        '-' for reverse and '.' for both. Defoult is both.
    :param bool skip_plot: True for skip plot.
    :param list str black_list: One or more chromosome patterns to skip
        during bin liquidation. Default is to skip any chromosomes that
        contain any of the following substrings `chrUn`, `_random`,
        `Zv9_` or `_hap`.
    :param int threads: Number of CPUs

    """
    if not xor(bin_size, regions):
        raise KeyError('Exactly one of `bin_size` and `regions` parameters must be given.')

    if regions and not is_data(regions):
        raise KeyError('`regions` parameter must be data object.')

    input_objects = []

    bams = [sample.get_bam() for sample in get_samples(resource)]
    input_objects.extend(bams)
    bams = [get_data_id(bam) for bam in bams]

    inputs = {
        'bam': bams,
    }

    if bin_size:
        inputs['analysis_type'] = 'bin'
        inputs['bin_size'] = bin_size
    else:  # regions
        if regions.process_type == 'data:annotation:gtf:':
            inputs['analysis_type'] = 'gtf'
        elif regions.process_type == 'data:bed:':
            inputs['analysis_type'] = 'bed'
        else:
            raise KeyError(
                '`regions` object must be of type `data:annotation:gtf:` or `data:bed:`'
            )

        input_objects.append(regions)
        inputs['regions_file_gtf'] = get_data_id(regions)

    if cell_type is not None:
        inputs['cell_type'] = cell_type

    if extension is not None:
        inputs['extension'] = extension

    if sense is not None:
        inputs['sense'] = sense

    if skip_plot is not None:
        inputs['skip_plot'] = skip_plot

    if black_list is not None:
        inputs['black_list'] = black_list

    if threads is not None:
        inputs['threads'] = threads

    resolwe = get_resolwe(*input_objects)

    bamliquidator_obj = resolwe.get_or_run(slug='bamliquidator', input=inputs)

    if is_collection(resource):
        resource.add_data(bamliquidator_obj)
    elif is_relation(resource):
        resource.collection.add_data(bamliquidator_obj)

    return bamliquidator_obj
Example #10
0
def bamplot(resource, genome, input_gff=None, input_region=None, stretch_input=None, color=None,
            sense=None, extension=None, rpm=None, yscale=None, names=None, plot=None, title=None,
            scale=None, bed=None, multi_page=None):
    """Run ``bamplot`` on the resource.

    This method runs `bamplot`_ with bams, genome and gff or region
    specified in arguments.

    .. _bamplot:
        http://resolwe-bio.readthedocs.io/en/latest/catalog-definitions.html#process-bamplot

    :param list resource: resource from which bam objects will be get
    :param str genome: Genome used in the process (options are HG18,
        HG19, MM9 and MM10)
    :param input_gff: id of annotation file is given
    :type input_gff: int or `~resdk.resources.data.Data`
    :param str input_region: enter a genomic region
    :param int stretch_input: stretch the input regions to a minimum
        length
    :param str color: enter a colon separated list of colors
    :param str sense: map to forward, reverse or'both strand,
        default maps to ``both``
    :param int extension: extends reads by n bp, dfault value is 200bp
    :param bool rpm: normalizes density to reads per million (rpm),
        default is ``False``
    :param str yscale: choose either relative or uniform y axis scaling,
        default is ``relative scaling``
    :param str names: a comma separated list of names for your bams
    :param str plot: choose all lines on a single plot or multiple plots
    :param str title: title for the output plot(s), default will be the
        coordinate region
    :param str scale: a comma separated list of multiplicative scaling
        factors for your bams, default is ``None``
    :param list beds: subset of bed files to run process on, if empty
        processes for all bed files will be run
    :param bool multi_page: if flagged will create a new pdf for each
        region

    """
    input_objects = []

    if not input_gff and not input_region:
        raise KeyError('Please specify `input_gff` or `input_region.')
    if input_gff and input_region:
        raise KeyError('Please specify `input_gff` or `input_region.')

    valid_genomes = ['HG18', 'HG19', 'MM8', 'MM9', 'MM10', 'RN4', 'RN6']
    if genome not in valid_genomes:
        raise KeyError('Invalid `genome`, please use one of the following: '
                       '{}'. format(', '.join(valid_genomes)))

    bams = [sample.get_bam() for sample in get_samples(resource)]
    input_objects.extend(bams)
    bams = [get_data_id(bam) for bam in bams]

    inputs = {
        'genome': genome,
        'bam': bams,
    }

    if color is not None:
        inputs['color'] = color

    if sense is not None:
        inputs['scale'] = scale

    if extension is not None:
        inputs['extension'] = extension

    if rpm is not None:
        inputs['rpm'] = rpm

    if yscale is not None:
        inputs['yscale'] = yscale

    if names is not None:
        inputs['names'] = names

    if plot is not None:
        inputs['plot'] = plot

    if title is not None:
        inputs['title'] = title

    if scale is not None:
        inputs['scale'] = scale

    if multi_page is not None:
        inputs['multi_page'] = multi_page

    if input_gff is not None:
        input_objects.append(input_gff)
        inputs['input_gff'] = get_data_id(input_gff)

    if input_region is not None:
        inputs['input_region'] = input_region

    if bed is not None:
        if isinstance(bed, list):
            input_objects.extend(bed)
            inputs['bed'] = [get_data_id(bed_obj) for bed_obj in bed]
        else:
            input_objects.append(bed)
            inputs['bed'] = [get_data_id(bed)]

    resolwe = get_resolwe(*input_objects)

    bamplot_obj = resolwe.get_or_run(slug='bamplot', input=inputs)

    if is_collection(resource):
        resource.add_data(bamplot_obj)
    elif is_relation(resource):
        resource.collection.add_data(bamplot_obj)

    return bamplot_obj