コード例 #1
0
    def create_mapped_reads_count(self,
                                  parent=None,
                                  name=None,
                                  url=None,
                                  reference_genome=None,
                                  metainfo=None):
        """
        Create a Mapped Reads Count file from a local or remote mapped reads count file.

        :param parent: accession of parent folder leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param url: URL of a file
        :param reference_genome: reference genome accession
        :type reference_genome: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        reference_genome and metainfo.add_file_reference(
            BioMetainfo.REFERENCE_GENOME, reference_genome)
        url and metainfo.add_external_link(BioMetainfo.DATA_LINK, url)
        return self.__invoke_loader('genestack/mappedReadsCountLoader',
                                    'importFile', parent, metainfo)
コード例 #2
0
    def create_microarray_assay(self,
                                parent,
                                name=None,
                                urls=None,
                                method=None,
                                organism=None,
                                metainfo=None):
        """
        Create a Genestack Microarray Assay inside an Experiment folder. If ``parent`` is not an Experiment, an exception will be raised.
        ``name`` and ``urls`` are required fields. They can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent experiment
        :type parent: str
        :param name: name of the file
        :type name: str
        :param urls: list of urls
        :type urls: list
        :param method: method
        :type method: str
        :param organism: organism
        :type organism: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism)
        method and metainfo.add_string(BioMetainfo.METHOD, method)
        if urls:
            for url in urls:
                metainfo.add_external_link(BioMetainfo.READS_LINK, url)
        return self.__invoke_loader('genestack/experimentLoader',
                                    'addMicroarrayAssay', parent, metainfo)
コード例 #3
0
    def create_report_file(self,
                           parent=None,
                           name=None,
                           urls=None,
                           metainfo=None):
        """
        Create a Genestack Report File from a local or remote data file.
        ``name`` and ``urls`` are required fields.
        They can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent folder leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param urls: URL or list of URLs of local file paths
        :type urls: list or str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        if urls:
            for url in urls:
                metainfo.add_external_link(BioMetainfo.DATA_LINK, url)
        return self.__invoke_loader('genestack/reportLoader', 'importFile',
                                    parent, metainfo)
コード例 #4
0
    def create_dbnsfp(self,
                      parent=None,
                      url=None,
                      name=None,
                      organism=None,
                      metainfo=None):
        """
        Create a Genestack Variation Database file. ``name`` and ``url`` are required fields.
        They can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent folder leave empty for ``Imported files``
        :type parent: str
        :param url: URL or local path
        :type url: str
        :param name: name of the file
        :type name: str
        :param organism: organism
        :type organism: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        metainfo.add_string(BioMetainfo.DATABASE_ID, 'dbNSFP')
        name and metainfo.add_string(BioMetainfo.NAME, name)
        organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism)
        if url:
            metainfo.add_external_link(BioMetainfo.DATA_LINK, url)
        return self.__invoke_loader('genestack/variationDatabaseLoader',
                                    'addDbNSFP', parent, metainfo)
コード例 #5
0
    def create_experiment(self,
                          parent=None,
                          name=None,
                          description=None,
                          metainfo=None):
        """
        Create a Genestack Experiment. The ``name`` parameter is required.
        It can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent folder. Leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param description: experiment description
        :type description: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        description and metainfo.add_string(BioMetainfo.DESCRIPTION,
                                            description)
        return self.__invoke_loader('genestack/experimentLoader',
                                    'addExperiment', parent, metainfo)
コード例 #6
0
    def create_wig(self,
                   parent=None,
                   name=None,
                   reference_genome=None,
                   url=None,
                   metainfo=None):
        """
        Create a Genestack Wiggle Track from a local or remote WIG file.
        ``name`` and ``url`` are required fields. They can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent folder. Leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param reference_genome: accession of reference genome
        :type reference_genome: str
        :param url: URL or local path to file
        :type url: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        reference_genome and metainfo.add_file_reference(
            BioMetainfo.REFERENCE_GENOME, reference_genome)
        url and metainfo.add_external_link(BioMetainfo.DATA_LINK, url)
        return self.__invoke_loader('genestack/wigLoader', 'importFile',
                                    parent, metainfo)
コード例 #7
0
    def create_reference_genome(self,
                                parent=None,
                                name=None,
                                description='',
                                sequence_urls=None,
                                annotation_url=None,
                                organism=None,
                                assembly=None,
                                release=None,
                                strain=None,
                                metainfo=None):
        """
        Create a Genestack Reference Genome from a collection of local or remote FASTA sequence files, and a GTF or GFF
        annotation file.

        :param parent: accession of parent folder leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param description: experiment description
        :type description: str
        :param sequence_urls: list urls or local path to sequencing files.
        :type sequence_urls: list
        :param annotation_url: url to annotation file
        :type annotation_url: str
        :param organism: organism
        :type organism: str
        :param assembly: assembly
        :type assembly: str
        :param release: release
        :type release: str
        :param strain: strain
        :type strain: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return:
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism)
        strain and metainfo.add_string(BioMetainfo.STRAIN, strain)
        assembly and metainfo.add_string(BioMetainfo.REFERENCE_GENOME_ASSEMBLY,
                                         assembly)
        release and metainfo.add_string(BioMetainfo.REFERENCE_GENOME_RELEASE,
                                        release)
        annotation_url and metainfo.add_external_link(
            ANNOTATION_KEY, annotation_url, text='Annotations data link')
        metainfo.add_string(metainfo.DESCRIPTION, description or '')
        for seq_link in sequence_urls:
            metainfo.add_external_link(SEQUENCE_KEY,
                                       seq_link,
                                       text='Sequence data link')
        return self.__invoke_loader('genestack/referenceGenomeLoader',
                                    'importFile', parent, metainfo)
コード例 #8
0
    def create_genome_annotation(self,
                                 parent=None,
                                 url=None,
                                 name=None,
                                 organism=None,
                                 reference_genome=None,
                                 strain=None,
                                 metainfo=None):
        """
        Create a Genestack Genome Annotation file from a local or remote file.
        ``name`` and ``url`` are required fields. They can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent folder. Leave empty for ``Imported files``
        :type parent: str
        :param url: URL or local path
        :type url: str
        :param name: name of the file
        :type name: str
        :param organism: organism
        :type organism: str
        :param reference_genome: reference genome accession
        :type reference_genome: str
        :param strain: strain
        :type strain: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism)
        strain and metainfo.add_string(BioMetainfo.STRAIN, strain)
        reference_genome and metainfo.add_file_reference(
            BioMetainfo.REFERENCE_GENOME, reference_genome)
        if url:
            metainfo.add_external_link(BioMetainfo.DATA_LINK, url)
        return self.__invoke_loader('genestack/genome-annotation-loader',
                                    'addGOAnnotationFile', parent, metainfo)
コード例 #9
0
    def create_unaligned_read(self,
                              parent=None,
                              name=None,
                              urls=None,
                              method=None,
                              organism=None,
                              metainfo=None):
        """
        Create a Genestack Unaligned Reads file from one or several local or remote files. Most common file formats encoding
        sequencing reads with quality scores are accepted (FASTQ 33/64, SRA, FASTA+QUAL, SFF, FAST5).
        ``name`` and ``urls`` are required fields. They can be specified through the arguments or via a metainfo object.

        :param parent: accession of parent folder leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param urls: list of urls
        :type urls: list
        :param method: method
        :type method: str
        :param organism: organism
        :type organism: str
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism)
        method and metainfo.add_string(BioMetainfo.METHOD, method)
        if urls:
            for url in urls:
                metainfo.add_external_link(BioMetainfo.READS_LINK, url)
        return self.__invoke_loader('genestack/unalignedReadsLoader',
                                    'importFile', parent, metainfo)
コード例 #10
0
    def create_bam(self,
                   parent=None,
                   name=None,
                   url=None,
                   metainfo=None,
                   organism=None,
                   strain=None,
                   reference_genome=None):
        """
        Create a Genestack Aligned Reads file from a local or remote BAM file.

        :param parent: accession of parent folder leave empty for ``Imported files``
        :type parent: str
        :param name: name of the file
        :type name: str
        :param url: URL of a BAM file; the index will be created at initialization
        :param metainfo: metainfo object
        :type metainfo: BioMetainfo
        :param organism: organism
        :type organism: str
        :param strain: strain
        :type strain:
        :param reference_genome: reference genome accession
        :type reference_genome: str
        :return: file accession
        :rtype: str
        """
        metainfo = metainfo or BioMetainfo()
        name and metainfo.add_string(BioMetainfo.NAME, name)
        organism and metainfo.add_organism(BioMetainfo.ORGANISM, organism)
        strain and metainfo.add_string(BioMetainfo.STRAIN, strain)
        reference_genome and metainfo.add_file_reference(
            BioMetainfo.REFERENCE_GENOME, reference_genome)
        url and metainfo.add_external_link(BioMetainfo.BAM_FILE_LINK, url)
        return self.__invoke_loader('genestack/alignedReadsLoader',
                                    'importFile', parent, metainfo)
コード例 #11
0
# parse the CSV file
with open(csv_input, 'r') as the_file:
    reader = csv.DictReader(the_file, delimiter=",")
    field_names = reader.fieldnames

    # check if mandatory keys are in the CSV file
    for mandatory_key in MANDATORY_KEYS:
        if mandatory_key not in field_names:
            raise GenestackException(
                "The key '%s' must be supplied in the CSV file" %
                mandatory_key)

    for file_data in reader:

        # for each entry, prepare a BioMetainfo object
        metainfo = BioMetainfo()
        for key in field_names:
            # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods
            if key == "link":
                url = file_data[key]
                metainfo.add_external_link(key=BioMetainfo.READS_LINK,
                                           text="link",
                                           url=url,
                                           fmt=file_format)
            elif key == "organism":
                metainfo.add_organism(BioMetainfo.ORGANISM, file_data[key])
            # all the other keys are added as strings
            else:
                metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                metainfo.add_string(metainfo_key, file_data[key])
コード例 #12
0
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key)

        for file_data in reader:
            # find the corresponding file
            local_identifier = file_data[local_key]
            remote_file = identifier_map.get(local_identifier)
            if not remote_file:
                print "Warning: no match found for file name '%s'" % local_identifier
                continue

            # prepare a BioMetainfo object
            metainfo = BioMetainfo()
            for key in field_names:
                # key parsing logic
                value = file_data[key]
                if value == "" or value is None:
                    continue
                if key == args.local_key:
                    continue
                if key == "organism":
                    metainfo.add_organism(BioMetainfo.ORGANISM, value)
                else:
                    metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                    if parse_as_boolean(value) is not None:
                        metainfo.add_boolean(metainfo_key, parse_as_boolean(value))
                    else:
                        metainfo.add_string(metainfo_key, value)
コード例 #13
0

# parse the CSV file
with open(csv_input, 'r') as the_file:
    reader = csv.DictReader(the_file, delimiter=",")
    field_names = reader.fieldnames

    # check if mandatory keys are in the CSV file
    for mandatory_key in MANDATORY_KEYS:
        if mandatory_key not in field_names:
            raise GenestackException("The key '%s' must be supplied in the CSV file" % mandatory_key)

    for file_data in reader:

        # for each entry, prepare a BioMetainfo object
        metainfo = BioMetainfo()
        for key in field_names:
            # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods
            if key == "link":
                url = file_data[key]
                metainfo.add_external_link(key=BioMetainfo.READS_LINK, text="link", url=url, fmt=file_format)
            elif key == "organism":
                metainfo.add_organism(BioMetainfo.ORGANISM, file_data[key])
            # all the other keys are added as strings
            else:
                metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                metainfo.add_string(metainfo_key, file_data[key])

        # create the sequencing assay on Genestack
        created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo)
コード例 #14
0
 def create_codon_table(self, parent=None, metainfo=None):
     metainfo = metainfo or BioMetainfo()
     return self.__invoke_loader('genestack/codonTableLoader',
                                 'addCodonTable', parent, metainfo)
コード例 #15
0
        if args.local_key not in field_names:
            raise GenestackException(
                "Error: the local key %s is not present in the supplied CSV file"
                % args.local_key)

        for file_data in reader:
            # find the corresponding file
            local_identifier = file_data[local_key]
            remote_file = identifier_map.get(local_identifier)
            if not remote_file:
                print "Warning: no match found for file name '%s'" % local_identifier
                continue

            # prepare a BioMetainfo object
            metainfo = BioMetainfo()
            for key in field_names:
                # key parsing logic
                value = file_data[key]
                if value == "" or value is None:
                    continue
                if key == args.local_key:
                    continue
                if key == "organism":
                    metainfo.add_organism(BioMetainfo.ORGANISM, value)
                else:
                    metainfo_key = SPECIAL_KEYS.get(key.lower(), key)
                    if parse_as_boolean(value) is not None:
                        metainfo.add_boolean(metainfo_key,
                                             parse_as_boolean(value))
                    else: