예제 #1
0
def index_creation(test_kmers):

    # 1_param = cobs.CompactIndexParameters()
    #1_param.term_size = 1               # k-mer size
    #1_param.clobber = True               # overwrite output and temporary files
    #1_param.false_positive_rate = 0.4    # higher false positive rate -> smaller index

    for num in test_kmers:
        ten_param = cobs.CompactIndexParameters()
        ten_param.term_size = num  # k-mer size
        ten_param.clobber = True  # overwrite output and temporary files
        ten_param.false_positive_rate = 0.4  # higher false positive rate -> smaller index
        cobs.compact_construct("/ten",
                               str(num) + "_" + "ten_index.cobs_compact",
                               index_params=ten_param)

    #10_param = cobs.CompactIndexParameters()
    #10_param.term_size = 1               # k-mer size
    #10_param.clobber = True               # overwrite output and temporary files
# 10_param.false_positive_rate = 0.4    # higher false positive rate -> smaller index

#1_param = cobs.CompactIndexParameters()
#1_param.term_size = 1               # k-mer size
#1_param.clobber = True               # overwrite output and temporary files
#1_param.false_positive_rate = 0.4    # higher false positive rate -> smaller index

#   cobs.compact_construct("/one", "one_index.cobs_compact")
#  cobs.compact_construct("/hundred", "hundred_index.cobs_compact")
# cobs.compact_construct("/thousand", "thousand_index.cobs_compact")
    return
예제 #2
0
def create_index(input_dir, output_dir, kmer_length, fpr):
    """"Create cobs index"""
    params = cobs.CompactIndexParameters()
    params.term_size = kmer_length
    params.clobber = True               # overwrite output and temporary files
    params.false_positive_rate = fpr    # higher false positive rate -> smaller index
    cobs.compact_construct(os.path.join(input_dir), os.path.join(output_dir) + str(kmer_length) + "_index_index.cobs_compact", index_params=params)
    sys.exit(0)
def create_indexes_from_kmers(directory):
    test_kmers = [1, 2, 5, 10, 15, 20]
    for kmer in test_kmers:
        params = cobs.CompactIndexParameters()
        params.term_size = kmer
        params.clobber = True  # overwrite output and temporary files
        params.false_positive_rate = 0.4  # higher false positive rate -> smaller index
        cobs.compact_construct(directory,
                               "indexes/" + str(kmer) + "_index.cobs_compact",
                               index_params=params)
    return test_kmers
예제 #4
0
    def test_compact_construct_query(self):
        index_file = datadir + "/python_test.cobs_compact"

        # construct compact index
        p = cobs.CompactIndexParameters()
        p.clobber = True
        cobs.compact_construct(input=datadir + "/fasta",
                               out_file=index_file,
                               index_params=p)
        self.assertTrue(os.path.isfile(index_file))

        # run queries
        s = cobs.Search(index_file)
        r = s.search("AGTCAACGCTAAGGCATTTCCCCCCTGCCTCCTGCCTGCTGCCAAGCCCT")
        #print(r)
        self.assertEqual(len(r), 7)
        self.assertEqual(r[0].doc_name, "sample1")
        self.assertEqual(r[0].score, 20)
예제 #5
0
for x in range(len(protein_file)):
    hundred_thousand_indexes.append(x)
    
proteins = []
gene_names = []
for protein in hundred_thousand_indexes:
    info = cleaned[protein]
    proteins.append(info)
    gene_names.append(titles[protein])

duplicate_set = set()
duplicate_count = 1
for prot_index in tqdm(range(len(hundred_thousand_indexes))):
    info = proteins[prot_index]
    if not gene_names[prot_index] in duplicate_set:
        thou = open("FILES/" + gene_names[prot_index] + ".txt", "w")
        thou.write(info)
        thou.close()
        duplicate_set.add(gene_names[prot_index])
    else:
        thou = open("FILES/" + gene_names[prot_index] + ";" + str(duplicate_count) + ".txt", "w")
        thou.write(info)
        thou.close()
        duplicate_count += 1
        
params = cobs.CompactIndexParameters()
params.term_size = 10
params.clobber = True               # overwrite output and temporary files
params.false_positive_rate = 0.01    # higher false positive rate -> smaller index
cobs.compact_construct("FILES/", "INDEX/" + "10_index_index.cobs_compact", index_params=params)