Exemple #1
0
 def test_generate_sample_id_with_file_path(self):
     hash_code = hashing_util.generate_sample_id(
         'Sample1', 'gs://bucket1/dir1/file1.vcf')
     self.assertEqual(hash_code, 7715696391291253656)
     hash_code = hashing_util.generate_sample_id(
         'Sample2', 'gs://bucket1/dir1/file1.vcf')
     self.assertEqual(hash_code, 5682150464643626236)
     hash_code = hashing_util.generate_sample_id(
         'Sample1', 'gs://bucket1/dir1/file2.vcf')
     self.assertEqual(hash_code, 668336000922978678)
     hash_code = hashing_util.generate_sample_id(
         'Sample2', 'gs://bucket1/dir1/file2.vcf')
     self.assertEqual(hash_code, 5498327443813165683)
 def _lookup_encoded_sample_name(self, sample_name):
   sample_id = self._encoded_sample_names.get(sample_name)
   if not sample_id:
     if self._sample_name_encoding == SampleNameEncoding.WITH_FILE_PATH:
       sample_id = hashing_util.generate_sample_id(sample_name,
                                                   self._file_name)
     elif self._sample_name_encoding == SampleNameEncoding.WITHOUT_FILE_PATH:
       sample_id = hashing_util.generate_sample_id(sample_name)
     elif self._sample_name_encoding == SampleNameEncoding.NONE:
       sample_id = sample_name
     else:
       raise ValueError('Unknown Sample Name Encoding supplied: {}'.format(
           self._sample_name_encoding))
     self._encoded_sample_names[sample_name] = sample_id
   return sample_id
  def process(self, vcf_header):
    # type: (vcf_header_io.VcfHeader, bool) -> Dict[str, Union[int, str]]
    current_minute = self._get_now_to_minute()
    for sample in vcf_header.samples:
      if self._sample_name_encoding == SampleNameEncoding.WITH_FILE_PATH:
        sample = hashing_util.create_composite_sample_name(sample,
                                                           vcf_header.file_path)
      sample_id = hashing_util.generate_sample_id(sample)

      row = {
          sample_info_table_schema_generator.SAMPLE_ID: sample_id,
          sample_info_table_schema_generator.SAMPLE_NAME: sample,
          sample_info_table_schema_generator.FILE_PATH: vcf_header.file_path,
          sample_info_table_schema_generator.INGESTION_DATETIME: current_minute
      }
      yield row
Exemple #4
0
def hash_name(sample_name, file_name=''):
    return hashing_util.generate_sample_id(sample_name, file_name)
Exemple #5
0
    def test_generate_sample_id_without_file_path(self):
        hash_code = hashing_util.generate_sample_id('Sample1')
        self.assertEqual(hash_code, 6365297890523177914)

        hash_code = hashing_util.generate_sample_id('Sample2')
        self.assertEqual(hash_code, 8341768597576477893)