def index_creation(test_kmers): # 1_param = cobs.CompactIndexParameters() #1_param.term_size = 1 # k-mer size #1_param.clobber = True # overwrite output and temporary files #1_param.false_positive_rate = 0.4 # higher false positive rate -> smaller index for num in test_kmers: ten_param = cobs.CompactIndexParameters() ten_param.term_size = num # k-mer size ten_param.clobber = True # overwrite output and temporary files ten_param.false_positive_rate = 0.4 # higher false positive rate -> smaller index cobs.compact_construct("/ten", str(num) + "_" + "ten_index.cobs_compact", index_params=ten_param) #10_param = cobs.CompactIndexParameters() #10_param.term_size = 1 # k-mer size #10_param.clobber = True # overwrite output and temporary files # 10_param.false_positive_rate = 0.4 # higher false positive rate -> smaller index #1_param = cobs.CompactIndexParameters() #1_param.term_size = 1 # k-mer size #1_param.clobber = True # overwrite output and temporary files #1_param.false_positive_rate = 0.4 # higher false positive rate -> smaller index # cobs.compact_construct("/one", "one_index.cobs_compact") # cobs.compact_construct("/hundred", "hundred_index.cobs_compact") # cobs.compact_construct("/thousand", "thousand_index.cobs_compact") return
def create_index(input_dir, output_dir, kmer_length, fpr): """"Create cobs index""" params = cobs.CompactIndexParameters() params.term_size = kmer_length params.clobber = True # overwrite output and temporary files params.false_positive_rate = fpr # higher false positive rate -> smaller index cobs.compact_construct(os.path.join(input_dir), os.path.join(output_dir) + str(kmer_length) + "_index_index.cobs_compact", index_params=params) sys.exit(0)
def create_indexes_from_kmers(directory): test_kmers = [1, 2, 5, 10, 15, 20] for kmer in test_kmers: params = cobs.CompactIndexParameters() params.term_size = kmer params.clobber = True # overwrite output and temporary files params.false_positive_rate = 0.4 # higher false positive rate -> smaller index cobs.compact_construct(directory, "indexes/" + str(kmer) + "_index.cobs_compact", index_params=params) return test_kmers
def test_compact_construct_query(self): index_file = datadir + "/python_test.cobs_compact" # construct compact index p = cobs.CompactIndexParameters() p.clobber = True cobs.compact_construct(input=datadir + "/fasta", out_file=index_file, index_params=p) self.assertTrue(os.path.isfile(index_file)) # run queries s = cobs.Search(index_file) r = s.search("AGTCAACGCTAAGGCATTTCCCCCCTGCCTCCTGCCTGCTGCCAAGCCCT") #print(r) self.assertEqual(len(r), 7) self.assertEqual(r[0].doc_name, "sample1") self.assertEqual(r[0].score, 20)
for x in range(len(protein_file)): hundred_thousand_indexes.append(x) proteins = [] gene_names = [] for protein in hundred_thousand_indexes: info = cleaned[protein] proteins.append(info) gene_names.append(titles[protein]) duplicate_set = set() duplicate_count = 1 for prot_index in tqdm(range(len(hundred_thousand_indexes))): info = proteins[prot_index] if not gene_names[prot_index] in duplicate_set: thou = open("FILES/" + gene_names[prot_index] + ".txt", "w") thou.write(info) thou.close() duplicate_set.add(gene_names[prot_index]) else: thou = open("FILES/" + gene_names[prot_index] + ";" + str(duplicate_count) + ".txt", "w") thou.write(info) thou.close() duplicate_count += 1 params = cobs.CompactIndexParameters() params.term_size = 10 params.clobber = True # overwrite output and temporary files params.false_positive_rate = 0.01 # higher false positive rate -> smaller index cobs.compact_construct("FILES/", "INDEX/" + "10_index_index.cobs_compact", index_params=params)