Python TantalusApi.get_storageの例

プログラミング言語: Python

名前空間/パッケージ名: dbclients.tantalus

クラス/型: TantalusApi

メソッド/関数: get_storage

hotexamples.comのコード掲載数: 3

Python TantalusApi.get_storage - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdbclients.tantalus.TantalusApi.get_storageの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

TantalusApi(30)

get(26)

list(15)

get_storage_client(12)

update(10)

add_file(6)

get_or_create(6)

get_dataset_file_instances(5)

get_filepath(4)

get_storage(3)

get_file_resource_filename(2)

delete(2)

is_dataset_on_storage(2)

create(2)

check_file(2)

get_dataset_file_resources(1)

get_cache_client(1)

is_sequence_dataset_on_storage(1)

swap_file(1)

tag(1)

update_file(1)

コード例 #1

ファイルを表示

ファイル: add_fastq_metadata.py プロジェクト: sanansakura/sisyphus

def create_fastq_metadata_yaml(library_id, storage_name, dry_run=False):
    """
    Create a metadata.yaml file for a all FQ datasets for a library id.
    """
    tantalus_api = TantalusApi()

    storage = tantalus_api.get_storage(storage_name)
    client = tantalus_api.get_storage_client(storage_name)

    for dataset_info, metadata in create_lane_fastq_metadata(
            tantalus_api, library_id):
        metadata_filename = os.path.join(dataset_info['base_dir'],
                                         'metadata.yaml')
        metadata_filepath = tantalus_api.get_filepath(storage_name,
                                                      metadata_filename)

        metadata_io = io.BytesIO()
        metadata_io.write(
            yaml.dump(metadata, default_flow_style=False).encode())

        logging.info(f'writing metadata to file {metadata_filepath}')
        client.write_data(metadata_filename, metadata_io)

        logging.info(f'adding {metadata_filepath} to tantalus')

        if not dry_run:
            file_resource, file_instance = tantalus_api.add_file(
                storage_name, metadata_filepath, update=True)

            for dataset_id in dataset_info['dataset_ids']:
                dataset = tantalus_api.get('sequencedataset', id=dataset_id)

                new_file_resources = set(dataset['file_resources'])
                new_file_resources.add(file_resource['id'])

                tantalus_api.update('sequencedataset',
                                    id=dataset_id,
                                    file_resources=list(new_file_resources))

コード例 #2

ファイルを表示

def add_fastq_metadata_yaml(dataset_id, storage_name, dry_run=False):
    """
    Create a metadata.yaml file for a dataset and add to tantalus.
    """
    tantalus_api = TantalusApi()

    storage = tantalus_api.get_storage(storage_name)
    client = tantalus_api.get_storage_client(storage_name)

    metadata, base_dir = create_lane_fastq_metadata(tantalus_api, dataset_id)

    metadata_filename = os.path.join(base_dir, 'metadata.yaml')
    metadata_filepath = tantalus_api.get_filepath(storage_name,
                                                  metadata_filename)

    metadata_io = io.BytesIO()
    metadata_io.write(yaml.dump(metadata, default_flow_style=False).encode())

    print(f'writing metadata to file {metadata_filepath}')
    client.write_data(metadata_filename, metadata_io)

    print(f'adding {metadata_filepath} to tantalus')

    if not dry_run:
        file_resource, file_instance = tantalus_api.add_file(storage_name,
                                                             metadata_filepath,
                                                             update=True)

        dataset = tantalus_api.get('sequencedataset', id=dataset_id)

        new_file_resources = set(dataset['file_resources'])
        new_file_resources.add(file_resource['id'])

        tantalus_api.update('sequencedataset',
                            id=dataset_id,
                            file_resources=list(new_file_resources))

コード例 #3

ファイルを表示

ファイル: query_gsc_for_wgs_bams.py プロジェクト: eliko1991/sisyphus

def main(**kwargs):
    """
    Queries the GSC for WGS bams. Transfers bams to specified storage if 
    necessary and uploads metadata to tantalus

    Args:
        ids:                (string) a list of internal IDs to query the GSC for 
        storage:            (string) destination storage to transfer bams to
        id_type:            (string) type of ID specified (either sample or library) 
        skip_older_than:    (string) skip bams older than this date
        tag_name:           (string) tag name to associate the resulting sequence datasets
                            with when importing into tantalus
        update:             (flag) specifies whether metadata in tantalus is
                            to be updated or not
        skip_file_import:   (flag) import only new lanes into tantalus
        query_only:         (flag) only query for the bam paths on the GSC 
    """
    # Check if this script is being run on thost
    # If not, connect to an ssh client to access /projects/files
    if socket.gethostname() != "txshah":
        ssh_client = connect_to_client("10.9.208.161")
        sftp = ssh_client.open_sftp()
    else:
        sftp = None

    # Connect to the Tantalus API
    tantalus_api = TantalusApi()
    storage = tantalus_api.get_storage(kwargs["storage"])

    # Convert the date to the format we want
    if kwargs["skip_older_than"]:
        skip_older_than = valid_date(kwargs["skip_older_than"])

    # Check that an ID type was specified
    if not kwargs["id_type"]:
        raise Exception("Please specify an ID type (sample or library")

    details = []
    for identifier in kwargs["ids"]:
        # Query the GSC to see if the ID exists
        infos = query_gsc(identifier, kwargs["id_type"])

        if not infos:
            logging.info("No results for {} {}. Skipping import".format(
                kwargs["id_type"], identifier))
        else:
            logging.info("{} {} exists on the GSC".format(
                kwargs["id_type"], identifier))

        # Get the data from GSC
        details = get_gsc_details(
            infos,
            skip_older_than=kwargs["skip_older_than"],
        )

        # Import and transfer each file
        for detail in details:
            # Rename the bams according to internal templates
            bam_paths = rename_bam_paths(detail, storage, sftp)

            # If the bam path does not exist at the source, skip
            # the transfer and import
            if not bam_paths["source_bam_path"]:
                break

            # Skip import if we only wanted to query for paths
            if kwargs["query_only"]:
                continue

            if not kwargs["skip_file_import"]:
                # Transfer the bam to the specified storage
                transfer_gsc_bams(detail, bam_paths, storage, sftp)

                # Add the files to Tantalus
                logging.info("Importing {} to Tantalus".format(
                    bam_paths["tantalus_bam_path"]))

                dataset = import_bam(
                    storage_name=storage["name"],
                    bam_file_path=bam_paths["tantalus_bam_path"],
                    sample=detail["sample"],
                    library=detail["library"],
                    lane_infos=detail["lane_info"],
                    read_type=detail["read_type"],
                    tag_name=kwargs["tag_name"],
                    update=kwargs["update"])

                logging.info(
                    "Successfully added sequence dataset with ID {}".format(
                        dataset["id"]))
            else:
                logging.info("Importing library {} to tantalus".format(
                    detail["library"]["library_id"]))
                library_pk = tantalus_api.get_or_create(
                    "dna_library",
                    library_id=detail["library"]["library_id"],
                    library_type=detail["library"]["library_type"],
                    index_format=detail["library"]["index_format"])["id"]

                #Only add lanes, libraries, and samples to tantalus
                logging.info(
                    "Importing lanes for library {} to tantalus".format(
                        detail["library"]["library_id"]))
                for lane in detail["lane_info"]:
                    lane = tantalus_api.get_or_create(
                        "sequencing_lane",
                        flowcell_id=lane["flowcell_id"],
                        dna_library=library_pk,
                        read_type=lane["read_type"],
                        lane_number=str(lane["lane_number"]),
                        sequencing_centre="GSC",
                        sequencing_instrument=lane["sequencing_instrument"])
                    logging.info(
                        "Successfully created lane {} in tantalus".format(
                            lane["id"]))