Example #1
0
def get_gene_names(feature_ids, source, species):
    """Map feature IDs to gene names."""
    res = resdk.Resolwe()
    features = res.feature.filter(feature_id__in=feature_ids,
                                  source=source,
                                  species=species)
    return [feature.name for feature in features]
Example #2
0
 def setUp(self):
     self.cache_dir = tempfile.mkdtemp()
     self.test_server_url = "https://app.genialis.com"
     self.test_collection_slug = "resdk-test-collection-tables"
     self.res = resdk.Resolwe(
         url=self.test_server_url, username="******", password="******"
     )
     self.collection = self.res.collection.get(self.test_collection_slug)
     self.ct = RNATables(self.collection, cache_dir=self.cache_dir)
Example #3
0
    def setUp(self):
        self.res = resdk.Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)

        # Create data for tests:
        self.reads = self.upload_reads(self.res)
        self.genome = self.upload_genome(self.res)

        # Set permissions for running processes:
        self.allow_run_process(self.res, 'alignment-hisat2')
        super(TestStart, self).setUp()
Example #4
0
 def setUp(self):
     self.res = resdk.Resolwe(EMAIL, PASSW, URL)
     self.reads = os.path.abspath(
         os.path.normpath(
             os.path.dirname(__file__) + "./../files/example.fastq"))
     self.basename = os.path.basename(self.reads)
     self.fastq = self.basename + ".gz"
     self.fastqc_archive = self.basename.split('.')[0] + "_fastqc.zip"
     self.data = self.res.run(slug='upload-fastq-single',
                              input={'src': [self.reads]})
     wait_for_update(self.data, maxtime=20, step=2)
Example #5
0
def main():
    """Invoke when run directly as a program."""
    args = parse_arguments()

    res = resdk.Resolwe()

    with open(args.feature_ids) as gene_file:
        genes = [gene.strip() for gene in gene_file]

    org_features = res.feature.filter(source=args.source_db, feature_id=genes)

    if len(org_features) == 0:
        print(
            '{"proc.error":"No genes were fetched from the knowledge base."}')
        exit(1)

    species = set(feature.species for feature in org_features)

    if len(species) != 1:
        print('{"proc.error":"Input genes belong to multiple species."}')
        exit(1)
    else:
        species = species.pop()

    if args.species == species and args.source_db == args.target_db:
        target_ids = genes
    else:
        features = res.mapping.filter(source_db=args.source_db,
                                      target_db=args.target_db,
                                      source_id=genes)

        if len(features) == 0:
            print('{"proc.error":"Failed to map features."}')
            exit(1)

        target_ids = [str(feature.target_id) for feature in features]

        if len(genes) > len(target_ids):
            print('{"proc.warning":"Not all features could be mapped."}')

    with tempfile.NamedTemporaryFile() as input_genes:
        input_genes.write(' '.join(target_ids).encode("UTF-8"))
        input_genes.flush()
        process = Popen([
            'processor',
            str(args.pval),
            str(args.min_genes), args.obo, args.gaf, input_genes.name
        ],
                        stdout=PIPE,
                        stderr=DEVNULL)
        out, err = process.communicate()

        with open('terms.json', 'w') as f:
            f.write(out.decode("UTF-8"))
Example #6
0
    def setUp(self):
        self.res = resdk.Resolwe(EMAIL, PASSW, URL)
        self.reads = os.path.abspath(
            os.path.normpath(
                os.path.dirname(__file__) + "./../files/example.fastq"))
        self.data = self.res.run(slug='upload-fastq-single',
                                 input={'src': [self.reads]})
        wait_for_update(self.data, maxtime=20, step=2)
        self.contributor = self.data.contributor

        # Make collection and add self.data to it.
        json_data = self.res.api.collection.post(
            {u'name': 'testing_collection'})
        self.collection = Collection(model_data=json_data, resolwe=self.res)
Example #7
0
    def setUp(self):
        self.res = resdk.Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)

        self.reads = self.upload_reads(self.res)
        self.genome = self.upload_genome(self.res)
        self.genome_index = self.create_genome_index(self.res, self.genome)
        self.annotation = self.upload_annotation(self.res)

        # Set permissions for running processes:
        self.allow_run_process(self.res, 'upload-fastq-single')
        self.allow_run_process(self.res, 'alignment-hisat2')
        self.allow_run_process(self.res, 'workflow-bbduk-star-htseq')
        # Set permissions for using descriptor_schemas:
        self.allow_use_descriptor_schema(self.res, 'reads')
        self.allow_use_descriptor_schema(self.res, 'sample')
        super().setUp()
Example #8
0
    def setUp(self):
        self.res = resdk.Resolwe(EMAIL, PASSW, URL)
        self.reads = os.path.abspath(
            os.path.normpath(
                os.path.dirname(__file__) + "./../files/example.fastq"))
        self.basename = os.path.basename(self.reads)
        self.fastq = self.basename + ".gz"
        self.fastqc_archive = self.basename.split('.')[0] + "_fastqc.zip"
        self.data = self.res.run(slug='upload-fastq-single',
                                 input={'src': [self.reads]})
        wait_for_update(self.data, maxtime=20, step=2)

        # Make a sample
        self.sample = self.res.presample.filter(data=self.data.id)[0]
        self.sample.confirm_is_annotated()
        # Pull the same sample down again to get it as Sample with sample endpoint:
        self.sample = self.res.sample.filter(data=self.data.id)[0]
Example #9
0
    def test_tutorial_resources(self):
        """Verify existance of resources required for tutorial."""
        res = resdk.Resolwe(url='https://app.genialis.com')

        sample_slugs = [
            BaseResdkDocsFunctionalTest.sample_slug,
        ]
        for sample_slug in sample_slugs:
            res.sample.get(sample_slug)

        data_slugs = [
            BaseResdkDocsFunctionalTest.reads_slug,
            BaseResdkDocsFunctionalTest.genome_slug,
            BaseResdkDocsFunctionalTest.annotation_slug,
            BaseResdkDocsFunctionalTest.genome_index_slug,
        ]
        for data_slug in data_slugs:
            res.data.get(data_slug)
Example #10
0
    def setUp(self):
        self.res = resdk.Resolwe(EMAIL, PASSW, URL)
        self.reads = os.path.abspath(
            os.path.normpath(
                os.path.dirname(__file__) + "./../files/example.fastq"))
        self.yaml_path = os.path.abspath(
            os.path.normpath(
                os.path.dirname(__file__) + "./../files/custom_process.yaml"))
        self.tool1_path = os.path.abspath(
            os.path.normpath(os.path.dirname(__file__) + "./../files/sum.py"))

        # # Make a collection:
        colllection_json = self.res.api.collection.post(
            {u'name': 'test_collection'})
        self.collection = Collection(model_data=colllection_json,
                                     resolwe=self.res)

        self.result = None
Example #11
0
    def validate_protein(self):
        """Only validate protein names if species is human or mouse."""
        res = resdk.Resolwe(url="https://app.genialis.com")

        for sample_name in self.sample_names:
            species = self.get_element(column_name="Species",
                                       sample_name=sample_name)
            protein = self.get_element(column_name="Protein",
                                       sample_name=sample_name)
            gene_symbol = self.get_part_before_colon_hypen(protein)

            if gene_symbol and gene_symbol not in PROTEIN:
                if species in ["H**o sapiens", "Mus musculus"]:
                    kb_gene = res.feature.filter(source="UCSC",
                                                 feature_id=[gene_symbol])
                    if not kb_gene:
                        self.error(
                            "SAMPLE: {} - Gene symbol {} is either invalid or "
                            "Knowledge Base cannot be reached.".format(
                                sample_name, protein))
Example #12
0
def main():
    """Run."""
    res = resdk.Resolwe(url=IMAPS_URL)
    res.login()

    errors = []

    for species in SPECIES:
        check_asset(res, GENOME[species], errors)
        check_asset(res, ANNOTATION[species], errors)
        check_asset(res, SEGMENT[species], errors)
        check_asset(res, STAR_INDEX[species], errors)
        check_asset(res, TRNA_RRNA_SEQ[species], errors)
        check_asset(res, TRNA_RRNA_INDEX[species], errors)

    if errors:
        for err in errors:
            print(err)
        raise ValueError("See errors above.")

    print("All good, assets as expected.")
Example #13
0
def main():
    """Invoke when run directly as a program."""
    args = parse_arguments()

    res = resdk.Resolwe(url=SERVER_URL)
    res.login()
    collection = res.collection.get(name=args.collection)

    types = parse_types(args.types)
    for data in collection.data:
        if data.status != "OK":
            continue

        for type_ in types:
            # type is a tuple of size 1 or 2: (field_name) or (field_name, process_type)
            if len(type_) == 2:
                if not data.process.type.strip(":").endswith(type_[1]):
                    continue

            field_name = type_[0]

            if field_name not in data.output:
                continue

            if isinstance(data.output[field_name], list):
                for item in data.output[field_name]:
                    # Check if file name of the file to-be-downloaded will be
                    # clashing with existing filenames in download direcory. If
                    # so, rename existing file to unexisting name.
                    original_name = os.path.basename(item["file"])
                    rename_if_clashing(original_name, args.directory)
            else:
                original_name = os.path.basename(
                    data.output[field_name]["file"])
                rename_if_clashing(original_name, args.directory)

            print("Downloading {} output of data {} ...".format(
                field_name, data.name))
            data.download(field_name=field_name, download_dir=args.directory)
Example #14
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.slamdunk.tcount.path)
        assert basename.endswith(".tsv")
        name = basename[:-4]

        args = [
            "-o",
            ".",
            "-t",
            self.requirements.resources.cores,
        ]

        return_code, _, _ = Cmd["alleyoop"]["collapse"][args][
            inputs.slamdunk.tcount.path] & TEE(retcode=None)
        if return_code:
            self.error("Alleyoop collapse analysis failed.")

        collapsed_output = name + "_collapsed.txt"
        os.rename(name + "_collapsed.csv", collapsed_output)

        # normalize to TPM
        tcount_tpm = compute_tpm(collapsed_output)

        # Map gene symbols to feature IDs
        res = resdk.Resolwe()
        CHUNK_SIZE = 1000
        feature_dict = {}
        out_columns = [
            "gene_symbol",
            "length",
            "readsCPM",
            "readsTPM",
            "conversionRate",
            "Tcontent",
            "coverageOnTs",
            "conversionsOnTs",
            "readCount",
            "tcReadCount",
            "multimapCount",
        ]

        input_features = tcount_tpm.index.tolist()
        features_sublists = [
            input_features[i:i + CHUNK_SIZE]
            for i in range(0, len(input_features), CHUNK_SIZE)
        ]

        for fsublist in features_sublists:
            features = res.feature.filter(
                source=inputs.source,
                species=inputs.slamdunk.species,
                feature_id__in=fsublist,
            )
            feature_dict.update({f.feature_id: f.name for f in features})

        tcount_tpm["gene_symbol"] = tcount_tpm.index.map(feature_dict)
        tcount_tpm.to_csv(collapsed_output, columns=out_columns, sep="\t")

        outputs.tcount = collapsed_output
        outputs.species = inputs.slamdunk.species
        outputs.build = inputs.slamdunk.build
Example #15
0
    def setUp(self):
        self.res = resdk.Resolwe(ADMIN_USERNAME, ADMIN_PASSWORD, URL)

        self.reads = self.upload_reads(self.res)
        super(TestTutorialGet, self).setUp()
Example #16
0
import os
import csv
import resdk
from resdk.resources import Collection


parser = argparse.ArgumentParser(description='Upload raw data.')

parser.add_argument('-sample_sheet', type=str, help='Sample sheet', required=True)
parser.add_argument('-username', type=str, help='Username', required=True)
parser.add_argument('-password', type=str, help='Password', required=True)
parser.add_argument('-URL', type=str, help='URL', required=True)

args = parser.parse_args()

res = resdk.Resolwe(args.username, args.password, args.URL)
resdk.start_logging()

samples = {}

with open(args.sample_sheet, 'rb') as sample_sheet:
    sample_reader = csv.reader(sample_sheet, delimiter='\t')
    header = next(sample_reader)
    for row in sample_reader:
        samples[row[0]] = {col: '' for col in header}
        for i, column in enumerate(row):
            if i == 0:
                continue  # skip sample name
            samples[row[0]] [header[i]] = column

def main():
    """Invoke when run directly as a program."""
    args = parse_arguments()

    if args.norm_expressions and args.norm_expressions_type:
        if len(args.norm_expressions) != len(args.norm_expressions_type):
            print(
                error(
                    'The number of additional expression files must match the number of specified '
                    'expressions types.'))
            sys.exit(1)

    if args.norm_expressions_type:
        exp_types = [args.expressions_type] + args.norm_expressions_type
        if len(exp_types) != len(set(exp_types)):
            print(
                error(
                    'The union of the main expression type ({}) and additional normalized expression types {} '
                    'does not contain unique items.'.format(
                        args.expressions_type, args.norm_expressions_type)))
            sys.exit(1)

    res = resdk.Resolwe()

    feature_dict = {}
    df = parse_expression_file(args.expressions, args.expressions_type)

    # Get a list of feature IDs
    input_features = df['FEATURE_ID'].tolist()

    # Split feature IDs into chunks with max size of 10000 elements
    features_sublists = [
        input_features[i:i + CHUNK_SIZE]
        for i in range(0, len(input_features), CHUNK_SIZE)
    ]

    # Fetch features from KB and add them to {feature_id: feature_name} mapping dict
    for fsublist in features_sublists:
        features = res.feature.filter(source=args.source_db,
                                      species=args.species,
                                      feature_id=fsublist)
        feature_dict.update({f.feature_id: f.name for f in features})

    # Map gene symbols to feature IDs
    df['GENE_SYMBOL'] = df['FEATURE_ID'].map(feature_dict)

    # Check if all of the input feature IDs could be mapped to the gene symbols
    if not all(f_id in feature_dict for f_id in input_features):
        print(
            warning(
                '{} feature(s) could not be mapped to the associated feature symbols.'
                .format(sum(df.isnull().values.ravel()))))

    # Merge additional expression files with the original data frame
    if args.norm_expressions and args.norm_expressions_type:
        for exp_file, exp_type in zip(args.norm_expressions,
                                      args.norm_expressions_type):
            exp_df = parse_expression_file(exp_file, exp_type)
            df = df.merge(exp_df, on='FEATURE_ID')

    # Reorder the columns in dataframe
    columns = ['FEATURE_ID', 'GENE_SYMBOL', args.expressions_type]
    if args.norm_expressions_type:
        columns = columns + args.norm_expressions_type
    df = df[columns]

    # Replace NaN values with empty string
    df.fillna('', inplace=True)

    # Write to file
    df.to_csv(args.output_name + '.txt.gz',
              header=True,
              index=False,
              sep='\t',
              compression='gzip')

    # Write to JSON
    df_dict = df.set_index('FEATURE_ID').to_dict(orient='index')
    with open(args.output_name + '.json', 'w') as f:
        json.dump({'genes': df_dict}, f, allow_nan=False)
Example #18
0
 def setUp(self):
     self.res = resdk.Resolwe(EMAIL, PASSW, URL)
     self.remove = []
Example #19
0
def main():
    """Invoke when run directly as a program."""
    args = parse_arguments()

    res = resdk.Resolwe()

    with open(args.feature_ids) as gene_file:
        genes = [gene.strip() for gene in gene_file]

    org_features = res.feature.filter(source=args.source_db,
                                      species=args.species,
                                      feature_id=genes)

    if len(org_features) == 0:
        print(error("No genes were fetched from the knowledge base."))
        exit(1)

    if args.source_db == args.target_db:
        target_ids = genes
    else:
        mapping_res = res.mapping.filter(
            source_db=args.source_db,
            source_species=args.species,
            target_db=args.target_db,
            target_species=args.species,
            source_id=genes,
        )

        if len(mapping_res) == 0:
            print(error("Failed to map features."))
            exit(1)

        mappings = {}
        for m in mapping_res:
            if m.source_id in genes:
                if m.source_id not in mappings:
                    mappings[m.source_id] = m.target_id
                else:
                    print(
                        warning(
                            "Mapping {} returned multiple times.".format(m)))

        if len(genes) > len(mappings):
            print(warning("Not all features could be mapped."))

        target_ids = mappings.values()

    with tempfile.NamedTemporaryFile() as input_genes:
        input_genes.write(' '.join(target_ids).encode("UTF-8"))
        input_genes.flush()
        process = Popen([
            'processor',
            str(args.pval),
            str(args.min_genes), args.obo, args.gaf, input_genes.name
        ],
                        stdout=PIPE,
                        stderr=DEVNULL)
        out, err = process.communicate()

        with open('terms.json', 'w') as f:
            f.write(out.decode("UTF-8"))
import resdk

# Create a Resolwe object to interact with the server
res = resdk.Resolwe('admin', 'admin', 'https://torta.bcm.genialis.com')

# Print command details to stdout
resdk.start_logging()

# Get sample meta-data from the server
sample = res.sample.get('human-example-chr22')

# Download files associated with the sample
sample.download()
Example #21
0
"""Code for ``tutorial-get.rst`` file."""
import resdk

# Create a Resolwe object to interact with the server and login
res = resdk.Resolwe(url='https://app.genialis.com')
res.login()

# Enable verbose logging to standard output
resdk.start_logging()

res.data.all()
res.sample.all()

# Get all Collection objects with "SDK" in their name
res.collection.filter(name='SDK')

# Get all Processes with category "Align"
res.process.filter(category='Align')

# Filter by using several fields:
res.data.filter(
    status='OK',
    created__gt='2018-10-01',
    created__lt='2025-11-01',
    ordering='-modified',
    limit=3,
)

# Get object by slug
res.sample.get('resdk-example')
import resdk

# Create a Resolwe object to interact with the server
res = resdk.Resolwe('<USERNAME>', '<PASSWORD>', 'https://app.genialis.com')

# Enable verbose logging to standard output
resdk.start_logging()

# Get sample meta-data from the server
sample = res.sample.get('mouse-example-chr19')

# Download files associated with the sample
sample.download()