def test_json_for_sample(self):
        sample = Sample.objects.first()
        json = _get_json_for_sample(sample)

        self.assertSetEqual(
            set(json.keys()),
            {'projectGuid', 'individualGuid', 'sampleGuid', 'createdDate', 'sampleType', 'sampleId', 'sampleStatus',
             'datasetFilePath', 'loadedDate', 'datasetType', 'elasticsearchIndex'}
        )
Beispiel #2
0
    def test_json_for_sample(self):
        sample = Sample.objects.first()
        json = _get_json_for_sample(sample)

        self.assertSetEqual(
            set(json.keys()),
            {'projectGuid', 'individualGuid', 'sampleGuid', 'createdDate', 'sampleType', 'sampleId', 'sampleStatus',
             'datasetFilePath', 'loadedDate', 'datasetType', 'elasticsearchIndex'}
        )
Beispiel #3
0
def _retrieve_samples(cursor, project_guid, individuals_by_guid):
    """Retrieves sample metadata for the given project.

        Args:
            cursor: connected database cursor that can be used to execute SQL queries.
            project_guid (string): project_guid
            individuals_by_guid (dict): maps each individual_guid to a dictionary with individual info.
                This method adds a "sampleGuids" list to each of these dictionaries.
        Returns:
            2-tuple with dictionaries: (samples_by_guid, sample_batches_by_guid)
        """

    # use raw SQL since the Django ORM doesn't have a good way to express these types of queries.
    sample_query = """
        SELECT
          p.guid AS project_guid,
          i.guid AS individual_guid,
          s.guid AS sample_guid,
          s.created_date AS sample_created_date,
          s.sample_type AS sample_sample_type,
          s.dataset_type AS sample_dataset_type,
          s.sample_id AS sample_sample_id,
          s.elasticsearch_index AS sample_elasticsearch_index,
          s.dataset_file_path AS sample_dataset_file_path,
          s.sample_status AS sample_sample_status,
          s.loaded_date AS sample_loaded_date
        FROM seqr_sample AS s
          JOIN seqr_individual AS i ON s.individual_id=i.id
          JOIN seqr_family AS f ON i.family_id=f.id
          JOIN seqr_project AS p ON f.project_id=p.id
        WHERE p.guid=%s
    """.strip()

    cursor.execute(sample_query, [project_guid])

    columns = [col[0] for col in cursor.description]

    samples_by_guid = {}
    for row in cursor.fetchall():
        record = dict(zip(columns, row))

        sample_guid = record['sample_guid']
        if sample_guid not in samples_by_guid:
            samples_by_guid[sample_guid] = _get_json_for_sample(record)

        individual_guid = record['individual_guid']
        individuals_by_guid[individual_guid]['sampleGuids'].add(sample_guid)

        samples_by_guid[sample_guid]['individualGuid'] = individual_guid

    return samples_by_guid