コード例 #1
0
 def test_precompute_gencode(self):
     k = 1
     file_name = get_unzipped_file_name(skr_config.GENCODE_HUMAN)
     with open('../cache/' + file_name, mode='r') as infasta:
         (mean, std, unnormalized_frequency,
          names) = compute_normalization_and_frequency(infasta, k, False)
         print('For k=', k, 'length is', len(mean))
         assert len(mean) == 4**k
コード例 #2
0
def build_cache_files():
    if not os.path.exists(CACHE_DIR):
        os.mkdir(CACHE_DIR)

    fasta_sets = get_precomputed_fasta_sets()

    for fasta_set in fasta_sets:
        if VERBOSE:
            print('Getting ', fasta_set)
        fasta_file = getGenCode.get_unzipped_file_name(fasta_set)
        dir_name = pathlib.PurePath(fasta_file).stem
        path_to_dir = os.path.join(CACHE_DIR, dir_name)
        if not os.path.exists(path_to_dir):
            os.mkdir(path_to_dir)

        names_written = False
        tsave = 0
        if VERBOSE:
            print(dir_name + ' computing normalization took\t', end='')
        for kmer_length in range(1, skr_config.MAX_KMER_LENGTH_PRECOMPUTE + 1):
            fasta_path = os.path.join(CACHE_DIR, fasta_file)
            with open(fasta_path, mode='r') as infasta:
                t1 = time.perf_counter()
                (mean, std, unnormalized_frequency,
                 names) = compute_normalization_and_frequency(
                     infasta, kmer_length, return_normalized=False)
                t2 = time.perf_counter()
                if VERBOSE:
                    print('k=' + str(kmer_length) + ',%.3fs;\t' % (t2 - t1),
                          end='')

                t1 = time.perf_counter()
                np.save(
                    get_file_path_for(fasta_file, kmer_length,
                                      CACHE_FILE_TYPES.get('mean')), mean)
                np.save(
                    get_file_path_for(fasta_file, kmer_length,
                                      CACHE_FILE_TYPES.get('std')), std)
                np.save(
                    get_file_path_for(
                        fasta_file, kmer_length,
                        CACHE_FILE_TYPES.get('unnormalized_frequency')),
                    unnormalized_frequency)

                if not names_written:
                    with open(
                            get_file_path_for(fasta_file, kmer_length,
                                              CACHE_FILE_TYPES.get('names')),
                            'wb') as names_file:
                        pickle.dump(names, names_file)
                    names_written = True

                t2 = time.perf_counter()
                tsave += (t2 - t1)
        if VERBOSE:
            print('\nAggregate save time for ' + dir_name +
                  ' was %.3fs' % tsave)
コード例 #3
0
def get_precomputed_frequency_path(comparison_set, kmer_length):
    if comparison_set is None or len(comparison_set) <= 0:
        return None

    fasta_sets =  get_precomputed_fasta_sets()
    for fasta_set in fasta_sets:
        if comparison_set == fasta_set.server_name:
            fasta_file = getGenCode.get_unzipped_file_name(fasta_set)
            unnormalized_frequency_path = get_file_path_for(fasta_file, kmer_length, CACHE_FILE_TYPES.get('unnormalized_frequency'))
            names_path = get_file_path_for(fasta_file, kmer_length, CACHE_FILE_TYPES.get('names'))
            if os.path.exists(unnormalized_frequency_path) and os.path.exists(names_path):
                return unnormalized_frequency_path, names_path

    return None
コード例 #4
0
def get_precomputed_normalization_path(parameters):
    normal_set = parameters['normal_set']

    if normal_set is None or len(normal_set) <= 0:
        return None

    fasta_sets =  get_precomputed_fasta_sets()
    for fasta_set in fasta_sets:
        if normal_set == fasta_set.server_name:
            fasta_file = getGenCode.get_unzipped_file_name(fasta_set)
            mean_path = get_file_path_for(fasta_file, parameters['kmer_length'], CACHE_FILE_TYPES.get('mean'))
            std_path = get_file_path_for(fasta_file, parameters['kmer_length'], CACHE_FILE_TYPES.get('std'))
            if os.path.exists(mean_path) and os.path.exists(std_path):
                return (mean_path, std_path)
            else:
                raise SeekrServerError('Fasta file <' + fasta_file + '> not found for kmer_length=' +  str(parameters['kmer_length']) )
                return None

    return None