Exemple #1
0
# Prepare for Clustering 
#===============================================================================

# Separate all reads into separate files for each group so they are clustered individually
files2cluster, path = Preprocess.trim_reads(mode='separate', n=1)

#===============================================================================
# Cluster Data 
#===============================================================================

# For Clustering 
#------------------------------------------------------------------------------ 
# Default vars for clustering 
default_vars = { 'c_thresh' : 0.90,
                 'n_filter' : 8,
                 'maskN' : False}

# Varibles to change, 1 dictionary per run
run_parameters = [ 
                    { 'c_thresh' : 1.0},
                    { 'c_thresh' : 0.90},
                   ]
                   
Cluster = ClusterClass(infiles=files2cluster, inpath=path, defaults=default_vars) 
Cluster.c = c
Cluster.db = db

out_list = Cluster.run_batch_cdhit_clustering(run_parameters, threads=1)

## Display Summary
Exemple #2
0
f = open("indel_test.fasta", "w")

seq1 = Seq(seq_generator(100))

seq2 = seq1[1:] + "T"
seq3 = seq1[:50] + seq1[51:] + "G"
seq4 = seq1[:25] + "T" + seq1[25:-1]
seq5 = seq1[:75] + "A" + seq1[75:-1]

seqR1 = SeqRecord(seq1, id="original")
seqR2 = SeqRecord(seq2, id="deletion0")
seqR3 = SeqRecord(seq3, id="deletion50")
seqR4 = SeqRecord(seq4, id="insertion25")
seqR5 = SeqRecord(seq5, id="insertion75")

SeqIO.write([seqR1, seqR2, seqR3, seqR4, seqR5], f, "fasta")
f.flush()
f.close()

path = os.getcwd()

params = {"c_thresh": 0.90, "n_filter": 8, "threads": 1, "mem": 0, "maskN": False, "outfile_postfix": "-clustered"}

C = ClusterClass(infiles="indel_test.fasta", inpath=path, defaults=params)
out = C.run_single_cdhit_clustering()

with open(out[0], "r") as outf:
    for line in outf:
        print line