def test_gappy_kernel_bigger_gap(self):
        sequences = [Seq("ACGTCGATGC")]
        gappy_kernel = gk(sequences,k=1,t=0,g=3, gapDifferent = False, sparse = False)
        expected = np.array([[0,3,2,2,1,1,4,2,2,3,2,2,1,2,2,1]])
        expected = expected.astype(float)

        self.assertTrue(np.array_equal(expected, gappy_kernel))
    def test_gappy_kernel_flanking(self):
        sequences = [Seq("ACGTCGatgC")]
        gappy_kernel = gk(sequences,k=1,t=0,g=1, gapDifferent = False, sparse = False, include_flanking = True)
        expected = np.array([[0,1,2,1,1,0,2,1,1,2,0,2,0,2,2,0]])
        expected = expected.astype(float)

        self.assertTrue(np.array_equal(expected, gappy_kernel))
    def test_gappy_kernel_reverse(self):
        sequences = [Seq("ACGTCGATGC")]
        gappy_kernel = gk(sequences,k=1,t=0,g=1, gapDifferent = False, reverse = True, sparse = False)
        expected = np.array([[0,3,3,1,3,0,2,0,3,2,0,0,0,0,0,0]])
        expected = expected.astype(float)

        self.assertTrue(np.array_equal(expected, gappy_kernel))
    def test_gappy_kernel(self):
        sequences = [Seq("ACGTCGATGC"), Seq("GTCGATAGC"), Seq("GTCGaaagATAGC")]
        gappy_kernel = gk(sequences,k=1,t=0,g=1, gapDifferent = False, sparse = False)
        expected = np.array([[0,1,2,1,1,0,2,1,1,2,0,2,0,2,2,0],[1,1,1,1,1,0,1,0,1,2,0,2,1,1,2,0],[1,1,1,1,1,0,1,0,1,2,0,2,1,1,2,0]])
        expected = expected.astype(float)

        self.assertTrue(np.array_equal(expected, gappy_kernel))
    def test_gappy_kernel_sparse(self):
        sequences = [Seq("ACGTCGATGC"), Seq("GTCGATAGC"), Seq("GTCGaaagATAGC")]
        gappy_kernel = gk(sequences,k=1,t=0,g=1, gapDifferent = False)
        expected = np.array([[0,1,2,1,1,0,2,1,1,2,0,2,0,2,2,0],[1,1,1,1,1,0,1,0,1,2,0,2,1,1,2,0],[1,1,1,1,1,0,1,0,1,2,0,2,1,1,2,0]])
        expected = expected.astype(float)
        expected = csr_matrix(expected)

        self.assertTrue(0 == (expected != gappy_kernel).getnnz())
    def test_gappy_kernel_bigger_k(self):
        sequences = [Seq("ACGTCG")]
        gappy_kernel = gk(sequences,k=2,t=0,g=1, gapDifferent = False, sparse = False)
        expected = np.zeros((1,256))
        expected[0,27] = 1.0
        expected[0,29] = 1.0
        expected[0,109] = 1.0
        expected[0,102] = 1.0
        expected[0,182] = 1.0

        self.assertTrue(np.array_equal(expected, gappy_kernel))
Esempio n. 7
0
def MakeKmerGappy(file1,K,T,G):
    ti= tempfile.NamedTemporaryFile(delete=False,mode='w+b')
    dest=open(file1+"_gappy_kmers.csv","w")
    sequences = list(SeqIO.parse(file1, "fasta"))
    X1=gk(sequences,k=int(K),t=int(T),g=int(G))
    scipy.io.mmwrite(ti,X1)
    ti.seek(0)
    cnt=0
    KmerDict=OrderedDict()
    FirstRow=[]
    NameDict=OrderedDict()
    NameDict=ExtractInfoFasta(file1)
    for line in ti:
        line=line.decode()
        line=line.rstrip()
        if cnt<3:
            cnt+=1
        else:
            a=line.split()
            if a[1] not in FirstRow:
                FirstRow.append(a[1])
            if a[0] not in KmerDict:
                KmerDict[a[0]]={}
            KmerDict[a[0]][a[1]]=str(float(a[2]))
    dest.write("id,"+",".join(FirstRow)+",species")
    dest.write("\n")
    Ke=list(KmerDict.keys())
    for i in Ke:
        Li=[]
        Li.append(NameDict[int(i)][0])
        for j in FirstRow:
            if KmerDict[i].get(j,"-9")=="-9":
                Li.append("0")
            else:
                Li.append(str(int(float(KmerDict[i][j]))))
        Li.append(NameDict[int(i)][1])
        dest.write(",".join(Li))
        dest.write("\n")
    dest.close()
    ti.close()