예제 #1
1
    def test_lcs_kosa_koza(self):
        seq1 = 'kosa'
        seq2 = 'koza'

        result = [['k', 'o', 'a']]

        self.assertEqual(lcs.lcs(seq1, seq2), result)
예제 #2
1
    def test_lcs_abcd_acbd(self):
        seq1 = 'abcd'
        seq2 = 'acbd'

        result = [['a', 'c', 'd'], ['a', 'b', 'd']]

        self.assertEqual(lcs.lcs(seq1, seq2), result)
예제 #3
0
    def test_lcs_empty_acbd(self):
        seq1 = ''
        seq2 = 'acbd'

        result = [[]]

        self.assertEqual(lcs.lcs(seq1, seq2), result)
예제 #4
0
    def solve2(cls,Str,len1,len2):
        tlen = len(Str[0])
        plen = len(Str[1])
    #    print Str[0]
     #   print Str[1]
       # print min(plen,tlen)
        ans = 0
        MML = 4
        MaxLen=MML+1
       # print Str[0]
       # print Str[1]
        while MaxLen>MML:
            MaxLen = MML
            j = 1
            now_i=0
            #dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]]
            
            k,s,t = lcs(Str[0],Str[1])

            if k<MaxLen:
                continue
            if s+k<tlen and Str[0][s+k]!='$':
                Str[0] = Str[0][0:s]+"$"+Str[0][s+k:tlen]
            else:
                Str[0] = Str[0][0:s]+Str[0][s+k:tlen]
            tlen=len(Str[0])
            if t+k<plen and Str[1][t+k]!='$':
                Str[1] = Str[1][0:t]+"$"+Str[1][t+k:plen]
            else:
                Str[1] = Str[1][0:t]+Str[1][t+k:plen]
            plen=len(Str[1])
            ans+=k
            MaxLen = k
        return ans
예제 #5
0
 def lcs_call(self, widget):
     str_a = self.txt_grpa.get_text()
     str_b = self.txt_grpb.get_text()
     str_result = lcs(str_a, str_b)
     dialog = DialogWindow(self, str_result)
     dialog.run()
     dialog.destroy()
예제 #6
0
def main(filepath):
    dataset = json.loads(open(filepath).read())["dataset"]
    original = {}
    sequences = {}
    for d in dataset:
        original.setdefault(d, 0)
        original[d] += 1
    keys = original.keys()
    for i in range(len(keys)):
        for j in range(i + 1, len(keys)):
            lcs_result = lcs.lcs(keys[i], keys[j])
            for l in lcs_result:
                if len(l) < 3:
                    continue
                str_result = ""
                for c in l:
                    str_result += c
                sequences.setdefault(str_result, 0)
                sequences[str_result] += 1
    result = {}
    h = suffixtree2.ST(sequences)
    alldata = h.print_tree(h.get(), 3)
    for k, v in sorted(alldata.items(), key=lambda x: x[1], reverse=True):
        _cnt = 0
        for s in keys:
            if check(s, k) == True:
                _cnt += 1
        result.setdefault(k, 0)
        result[k] = _cnt
    for k, v in sorted(result.items(), key=lambda x: x[1], reverse=True):
        print k + " was included in " + str(v) + " data."
예제 #7
0
파일: fmes.py 프로젝트: CoastED/coasted
def compare_value(value1, value2):
    if value1 is None and value2 is None:
        return 0.0
    if value1 is None or value2 is None:
        return 1.0
    return 1.0 - (float(len(lcs.lcs(lcs.path(value1, value2)))) /
                  max(len(value1), len(value2)))
예제 #8
0
def alignchildren(t1, t2, M, E, w, x):
  """
  See figure 9 in reference.
  """

  for c in w.elements():
    c.inorder = False
  for c in x.elements():
    c.inorder = False

  s1 = [child for child in w.elements() if child in M.left and M.left[child].parent == x]
  s2 = [child for child in x.elements() if child in M.right and M.right[child].parent == w]

  def equal(a, b):
    return (a, b) in M

  S = [(s1[x], s2[y]) for x, y in lcs.lcs(lcs.path(s1, s2, equal))]
  for (a, b) in S:
    a.inorder = b.inorder = True

  for a in s1:
    for b in s2:
      if (a, b) in M and (a, b) not in S:
        k = findpos(M, b)
        E.move(a.path(), w.path(), k)
        t1.move(a.path(), w.path(), str(k))
        a.inorder = b.inorder = True
예제 #9
0
파일: fmes.py 프로젝트: CoastED/coasted
def alignchildren(t1, t2, M, E, w, x):
    """
  See figure 9 in reference.
  """

    for c in w.elements():
        c.inorder = False
    for c in x.elements():
        c.inorder = False

    s1 = [
        child for child in w.elements()
        if child in M.left and M.left[child].parent == x
    ]
    s2 = [
        child for child in x.elements()
        if child in M.right and M.right[child].parent == w
    ]

    def equal(a, b):
        return (a, b) in M

    S = [(s1[x], s2[y]) for x, y in lcs.lcs(lcs.path(s1, s2, equal))]
    for (a, b) in S:
        a.inorder = b.inorder = True

    for a in s1:
        for b in s2:
            if (a, b) in M and (a, b) not in S:
                k = findpos(M, b)
                E.move(a.path(), w.path(), k)
                t1.move(a.path(), w.path(), str(k))
                a.inorder = b.inorder = True
예제 #10
0
파일: go.py 프로젝트: kazarin1alex/lit
    def sorted_active_runnable(self, query, hwnds):
        with QMutexLocker(self.mutex):
            # update query and collect active ones
            self._refresh_tasks(hwnds, query)
            active_tasks = [self.tasks[h] for h in hwnds]

            # sort by last use
            if not query:
                return sorted(active_tasks,
                              key=lambda t: t.usetime,
                              reverse=True)

            titles = [task.fullname.lower() for task in active_tasks]

            def f(task, title):
                return task.query.distance_to(title)

            ds = [
                f(task, title) * (10**len(query))
                for task, title in zip(active_tasks, titles)
            ]
            best = ds[0]

            for i in itertools.takewhile(lambda i: ds[i] == best,
                                         range(len(ds))):
                ds[i] -= len(lcs(query, titles[i]))

            #return sorted(active_tasks, key=f)
            return [
                task for i, task in sorted(enumerate(active_tasks),
                                           key=lambda i: ds[i[0]])
            ]
 def test_lcs(self):
     x = 'shirish'
     y = 'sigdyal'
     m = len(x)
     n = len(y)
     (array, length) = length_lcs(x, y, m, n)
     self.assertEqual(length, 2)
     result = lcs(x, y, array, length)
     self.assertListEqual(result, ['s', 'i'])
예제 #12
0
def image_fisher_featurize(im_key, gmm_sift, gmm_lcs, pca_sift, pca_lcs):
    t = time.time()
    s3 = boto3.resource('s3')
    s3.Bucket("pictureweb").download_file(im_key, "/tmp/img.jpg")
    im = scipy.misc.imread("/tmp/img.jpg", flatten=True)
    descs = sift(im).dot(pca_sift.T)
    sift_features = fisher.fisher_vector_features(descs.astype('float32'), *gmm_sift)
    im = scipy.misc.imread("/tmp/img.jpg")
    descs = lcs(im).reshape(-1, 96).dot(pca_lcs.T)
    lcs_features = fisher.fisher_vector_features(descs.astype('float32'), *gmm_lcs)
    out_features = np.hstack((sift_features, lcs_features)).T
    e = time.time()
    return out_features, e - t
예제 #13
0
파일: try.py 프로젝트: YLAsce/oj
def solve2(Str, len1, len2):
    code0 = copy.deepcopy(Str[0])
    code1 = copy.deepcopy(Str[1])
    tlen = len(code0)
    plen = len(code1)
    ans = 0
    MML = 4
    MaxLen = MML + 1
    whileTime = 0
    td = 0
    while MaxLen > MML:
        whileTime += 1
        MaxLen = MML
        j = 1
        now_i = 0
        #     t1 = datetime.now()
        dp = [65536 * [0], 65536 * [0]]

        k, s, t = lcs(Str[0], Str[1])

        #     t2 = datetime.now()
        #    print t2-t1

        #        print k
        #       print code0[s:s+k]+"   XXX  "+code1[t:t+k]+"   OOO  "

        if k < MaxLen:
            continue
            #print code0[s:s+k]
            #print code1[t:t+k]

        if s + k < tlen and code0[s + k] != '$':
            code0 = code0[0:s] + "$" + code0[s + k:tlen]
        else:
            code0 = code0[0:s] + code0[s + k:tlen]
        tlen = len(code0)
        if t + k < plen and code1[t + k] != '$':
            code1 = code1[0:t] + "$" + code1[t + k:plen]
        else:
            code1 = code1[0:t] + code1[t + k:plen]
        plen = len(code1)

        ans += k
        MaxLen = k
    #Set=[]
    #   print k
    #  print Str
    print whileTime
    print td
    return ans
예제 #14
0
파일: try.py 프로젝트: YLAsce/oj
def solve2(Str,len1,len2):
    code0 = copy.deepcopy(Str[0])
    code1 = copy.deepcopy(Str[1])
    tlen = len(code0)
    plen = len(code1)
    ans = 0
    MML = 4
    MaxLen=MML+1
    whileTime = 0
    td = 0
    while MaxLen>MML:
        whileTime += 1
        MaxLen = MML
        j = 1
        now_i=0
   #     t1 = datetime.now()
        dp=[65536*[0],65536*[0]]
        
        k,s,t = lcs(Str[0],Str[1])

   #     t2 = datetime.now()
    #    print t2-t1

#        print k
 #       print code0[s:s+k]+"   XXX  "+code1[t:t+k]+"   OOO  "
            
        if k<MaxLen:
            continue
            #print code0[s:s+k]
            #print code1[t:t+k]

        if s+k<tlen and code0[s+k]!='$':
            code0 = code0[0:s]+"$"+code0[s+k:tlen]
        else:
            code0 = code0[0:s]+code0[s+k:tlen]
        tlen=len(code0)
        if t+k<plen and code1[t+k]!='$':
            code1 = code1[0:t]+"$"+code1[t+k:plen]
        else:
            code1 = code1[0:t]+code1[t+k:plen]
        plen=len(code1)

        ans+=k
        MaxLen = k
    #Set=[]
     #   print k
      #  print Str
    print whileTime
    print td
    return ans
예제 #15
0
def solve2(Str, len1, len2):
    #t_m=[0]*cls.MAX_CODE_LENGTH
    # p_m=t_m
    Set = []
    tlen = len(Str[0])
    plen = len(Str[1])
    #  print Str[0]
    #  print Str[1]
    # print min(plen,tlen)
    ans = 0
    #  MML = min(tlen,plen)
    # if MML>200:
    #    MML = 10
    #else:
    #   MML = max(MML/25,4)
    MML = 4
    MaxLen = MML + 1
    # print Str[0]
    # print Str[1]
    while MaxLen > MML:
        MaxLen = MML
        j = 1
        now_i = 0
        #  dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]]
        k, s, t = lcs(Str[0], Str[1])

        #    print k
        #   print Str[0][s:s+k]+"   XXX  "+Str[1][t:t+k]+"   OOO  "

        if k < MaxLen:
            continue
            #print Str[0][s:s+k]
            #print Str[1][t:t+k]
        if s + k < tlen and Str[0][s + k] != '$':
            Str[0] = Str[0][0:s] + "$" + Str[0][s + k:tlen]
        else:
            Str[0] = Str[0][0:s] + Str[0][s + k:tlen]
        tlen = len(Str[0])
        if t + k < plen and Str[1][t + k] != '$':
            Str[1] = Str[1][0:t] + "$" + Str[1][t + k:plen]
        else:
            Str[1] = Str[1][0:t] + Str[1][t + k:plen]
        plen = len(Str[1])
        ans += k
        MaxLen = k
    #Set=[]
    #   print k
    #  print Str
    return ans
예제 #16
0
파일: modelsTest.py 프로젝트: YLAsce/oj
def solve2(Str,len1,len2):
    #t_m=[0]*cls.MAX_CODE_LENGTH
   # p_m=t_m
    Set=[]
    tlen = len(Str[0])
    plen = len(Str[1])
  #  print Str[0]
  #  print Str[1]
   # print min(plen,tlen)
    ans = 0
  #  MML = min(tlen,plen)
   # if MML>200:
    #    MML = 10
    #else:
     #   MML = max(MML/25,4)
    MML = 4
    MaxLen=MML+1
   # print Str[0]
   # print Str[1]
    while MaxLen>MML:
        MaxLen = MML
        j = 1
        now_i=0
      #  dp=[cls.MAX_CODE_LENGTH*[0],cls.MAX_CODE_LENGTH*[0]]
        k,s,t = lcs(Str[0],Str[1])

    #    print k
     #   print Str[0][s:s+k]+"   XXX  "+Str[1][t:t+k]+"   OOO  "
            
        if k<MaxLen:
            continue
            #print Str[0][s:s+k]
            #print Str[1][t:t+k]
        if s+k<tlen and Str[0][s+k]!='$':
            Str[0] = Str[0][0:s]+"$"+Str[0][s+k:tlen]
        else:
            Str[0] = Str[0][0:s]+Str[0][s+k:tlen]
        tlen=len(Str[0])
        if t+k<plen and Str[1][t+k]!='$':
            Str[1] = Str[1][0:t]+"$"+Str[1][t+k:plen]
        else:
            Str[1] = Str[1][0:t]+Str[1][t+k:plen]
        plen=len(Str[1])
        ans+=k
        MaxLen = k
    #Set=[]
     #   print k
      #  print Str
    return ans
예제 #17
0
def calculate_lcs(img_keys, out_matrix, block_idx, descs_per_img=16):
    feats = []
    s3 = boto3.resource('s3')
    import time
    t = time.time()
    np.random.seed(block_idx)
    for im_key in img_keys:
        s3.Bucket("pictureweb").download_file(im_key, "/tmp/img.jpg")
        im = scipy.misc.imread("/tmp/img.jpg")
        im = im.astype('float32')
        descs = lcs.lcs(im).reshape(-1, LCS_DESC_LENGTH)
        idxs = np.random.choice(descs.shape[0], descs_per_img)
        feats.append(descs[idxs, :])
    out_matrix.put_block(np.vstack(feats), block_idx, 0)
    e = time.time()
    return e - t
예제 #18
0
def _match(nodes1, nodes2, M, equal):
  nodes = nodes1 + nodes2
  for label in utils.nub([node.label for node in nodes]):

    s1 = get_chain(nodes1, label)
    s2 = get_chain(nodes2, label)

    path = lcs.lcs(lcs.path(s1, s2, equal))

    for x, y in path:
      M.add((s1[x], s2[y]))
    for x, y in reversed(path):
      s1.pop(x)
      s2.pop(y)

    for x in range(len(s1)):
      for y in range(len(s2)):
        if equal(s1[x], s2[y]):
          M.add((s1[x], s2[y]))
          s2.pop(y)
          break
예제 #19
0
파일: fmes.py 프로젝트: CoastED/coasted
def _match(nodes1, nodes2, M, equal):
    nodes = nodes1 + nodes2
    for label in utils.nub([node.label for node in nodes]):

        s1 = get_chain(nodes1, label)
        s2 = get_chain(nodes2, label)

        path = lcs.lcs(lcs.path(s1, s2, equal))

        for x, y in path:
            M.add((s1[x], s2[y]))
        for x, y in reversed(path):
            s1.pop(x)
            s2.pop(y)

        for x in range(len(s1)):
            for y in range(len(s2)):
                if equal(s1[x], s2[y]):
                    M.add((s1[x], s2[y]))
                    s2.pop(y)
                    break
예제 #20
0
파일: go.py 프로젝트: Answeror/lit
    def sorted_active_runnable(self, query, hwnds):
        with QMutexLocker(self.mutex):
            # update query and collect active ones
            self._refresh_tasks(hwnds, query)
            active_tasks = [self.tasks[h] for h in hwnds]

            # sort by last use
            if not query:
                return sorted(active_tasks, key=lambda t: t.usetime, reverse=True)

            titles = [task.fullname.lower() for task in active_tasks]

            def f(task, title):
                return task.query.distance_to(title)

            ds = [f(task, title) * (10 ** len(query)) for task, title in zip(active_tasks, titles)]
            best = ds[0]

            for i in itertools.takewhile(lambda i: ds[i] == best, range(len(ds))):
                ds[i] -= len(lcs(query, titles[i]))

            #return sorted(active_tasks, key=f)
            return [task for i, task in sorted(enumerate(active_tasks), key=lambda i: ds[i[0]])]
예제 #21
0
def similarity(x, y):
	"""
	similarity(x, y)
	
	This function measures string similarity between x and y.
	The function returns:
	0.8*(len(LongestCommonSubsequence(x, y))) + 
	0.2*1/(DamerauLevenshteinDistance(x, y))
	
	LCS and Levenshtein are, by trial and error, found to be 
	compensating for each others errors. Hence their combination
	in most cases seems to be one of the good solutions.
	 
	Eg: For Abhogi and Behag, LCS gives a high similarity
	score of 3, which is balanced by the Levenshtein's distance
	of 4, thus penalizing it.
	"""

	len_thresh = 0.75 
	# beyond this difference of ratio between lengths, they are deemed different terms

	if len(y) == 0:
		return 0
	ratio = 1.0*len(x)/len(y)
	if ratio > 1:
		ratio = 1/ratio;
	if ratio < len_thresh:
		return 0

	subseq = lcs.lcs(x, y)
	dldist = levenshtein.dameraulevenshtein(x, y)
	if dldist == 0:
		dldist = 1
		
	w1 = 0.8
	w2 = 0.2
	return w1*(1.0*len(subseq)/max([len(x), len(y)])) + (w2*1.0/dldist);
예제 #22
0
import numpy as np
import lcs
import time
import io
import boto3
import scipy.misc
import fisher
import pywren

t = time.time()
im_key = "imagenet_train_100k_pixels/8/11857.JPEG"

client = boto3.client('s3')
bio = io.BytesIO(client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read())

pca_mat_lcs = np.load("pca_mat_lcs.npy")
weights = np.load('gmm_lcs_weights_{0}_centers.npy'.format(16))
means = np.load('gmm_lcs_means_{0}_centers.npy'.format(16))
covars= np.load('gmm_lcs_covars_{0}_centers.npy'.format(16))
gmm_lcs = (means, covars, weights)

img = scipy.misc.imread(bio, flatten=True)
pwex = pywren.default_executor()
z = lcs.lcs(img).reshape(-1, 96)






예제 #23
0
파일: test.py 프로젝트: xiaochaowei/isp
	print flag
	flag = flag+ 1
	machine_id = row[0]
	isp = row[2]
	country = row[1]
	if isp == "":
		continue
	if machineList.has_key(machine_id):
		if isp in machineList[machine_id]:
			continue	
		else:
			index = -1
			max_sim = -1
			for isp_idx in range(0, len(machineList[machine_id])):
				isp_sub = machineList[machine_id][isp_idx]
				common_len = lcs.lcs(isp, isp_sub)
				sim = float(common_len) / max(len(isp), len(isp_sub))
				#print sim
				#print isp_sub, isp
				if sim > eps and max_sim < sim:
					index = isp_idx
					max_sim = sim
			if index != -1:
				#update
				if len(isp) < len(machineList[machine_id][index]):
					   	machineList[machine_id][index] = isp
						comment_sql = UPDATE_SQL.format(newisp = isp, machine_id = machine_id,  isp = machineList[machine_id][index])
						cursor.execute(comment_sql)
						conn.commit()
				else:
					continue
예제 #24
0
파일: annotate.py 프로젝트: shenki/fpos
def normal_lcs(a, b, la=None, lb=None):
    if not la:
        la = len(a)
    if not lb:
        lb = len(b)
    return 2 * lcs(a, b) / ( la + lb )
예제 #25
0
 def get_longest_substr(self, first_start, first_end, second_start, second_end):
     # alpha & beta are the substrings under consideration.
     alpha, beta = self.first[first_start:first_end], self.second[second_start:second_end]
     start, end, length = lcs(alpha, beta)
     return first_start + start, second_start + end, length
예제 #26
0
파일: test_lcs.py 프로젝트: tkyf/sandbox
 def test_lcs(self):
     for case in self.cases:
         self.assertEqual(lcs.lcs(case[0][0], case[0][1]), case[1])
     for case in self.cases_without_recursive:
         self.assertEqual(lcs.lcs(case[0][0], case[0][1]), case[1])
예제 #27
0
import concurrent.futures as fs
import numpy as np
import lcs
from numpy import ascontiguousarray as C_ARRAY


def convert_keystone_csv_to_numpy(fname):
    return np.array([[float(x) for x in y.split(",")]
                     for y in open(fname).read().strip().split("\n")])


if __name__ == "__main__":
    im = scipy.misc.imread("./ILSVRC2012_val_00000293.JPEG")
    # convert image to BGR
    im = im[:, :, ::-1]
    descs = lcs.lcs(im).reshape(-1, 96)
    descs_mean = descs[:, :48]
    descs_std = descs[:, 48:]
    descs = np.vstack((descs_mean, descs_std)).reshape((-1, 96), order='F').T

    descs_keystone = C_ARRAY(
        convert_keystone_csv_to_numpy("./lcs_imagenet.txt"))
    pca_mat = convert_keystone_csv_to_numpy("./pcaMat_lcs.csv")
    weights = convert_keystone_csv_to_numpy("./gmmCoefs_lcs.csv")
    means = convert_keystone_csv_to_numpy("./gmmMeans_lcs.csv").T
    covars = convert_keystone_csv_to_numpy("./gmmVars_lcs.csv").T
    descs = pca_mat.dot(descs)
    pca_keystone = convert_keystone_csv_to_numpy("./pca_keystone_lcs.txt")
    gmm = (means, covars, weights)
    fv_keystone = convert_keystone_csv_to_numpy("./fisher_keystone_lcs.txt")
    fv_features = fisher.fisher_vector_features(
import lcs

a ="aslkndlkasoihlkn"
b ="asdhklneldbaiubsc"


c = lcs.lcs(a,b)
print(c)
print(len(c))

print(len(a))
print(len(b))

# # But the Levenshtein distance should be 12, not ( Max(a,b) - lcs )
# a	s	 	l	k	 	n	d	l	k	a	s	o	i	h	l	k	n
# a	s	d	h	k	l	n	e	l	 	d	b	a	i	u	b	s	c
예제 #29
0
 def _select_best(self, articles):
     from gn import gn
     from lcs import lcs
     return min(articles,
                key=lambda art: lcs(self.gn, gn(_em(art['title']))))
예제 #30
0
    def calcScore(self, qtList):
        lcsSub = lcs.lcs(self.sub, self.qRaw)
        subIndex = self.qRaw.index(lcsSub)
        qTemplate = self.qRaw.replace(lcsSub, '')
        if self.pre == '':
            self.qType = 2
        else:
            lcsPre = lcs.lcs(self.pre, qTemplate)
            preIndex = qTemplate.index(lcsPre)
            qTemplate = qTemplate.replace(lcsPre, '')
            if preIndex < subIndex:
                self.qType = 1

        if self.qType == 0:
            qt01 = qTemplate[:subIndex]
            qt02 = qTemplate[subIndex:preIndex]
            qt03 = qTemplate[preIndex:]
            mSqt01 = 0
            mSqt02 = 0
            mSqt03 = 0
            for vQt01 in qtList['01']:
                tmp = Levenshtein.jaro(qt01, vQt01)
                if tmp > mSqt01:
                    mSqt01 = tmp
            for vQt02 in qtList['02']:
                tmp = Levenshtein.jaro(qt02, vQt02)
                if tmp > mSqt02:
                    mSqt02 = tmp
            for vQt03 in qtList['03']:
                tmp = Levenshtein.jaro(qt03, vQt03)
                if tmp > mSqt03:
                    mSqt03 = tmp
            self.score = (mSqt01 + mSqt02 + mSqt03 + Levenshtein.jaro(
                lcsSub, self.sub) + Levenshtein.jaro(lcsPre, self.pre)) / 5
        if self.qType == 1:
            qt11 = qTemplate[:preIndex]
            qt12 = qTemplate[preIndex:subIndex]
            qt13 = qTemplate[subIndex:]
            mSqt11 = 0
            mSqt12 = 0
            mSqt13 = 0
            for vQt11 in qtList['11']:
                tmp = Levenshtein.jaro(qt11, vQt11)
                if tmp > mSqt11:
                    mSqt11 = tmp
            for vQt12 in qtList['12']:
                tmp = Levenshtein.jaro(qt12, vQt12)
                if tmp > mSqt12:
                    mSqt12 = tmp
            for vQt13 in qtList['13']:
                tmp = Levenshtein.jaro(qt13, vQt13)
                if tmp > mSqt13:
                    mSqt13 = tmp
            self.score = (mSqt11 + mSqt12 + mSqt13 + Levenshtein.jaro(
                lcsSub, self.sub) + Levenshtein.jaro(lcsPre, self.pre)) / 5

        if self.qType == 2:
            qt20 = qTemplate
            qt21 = qTemplate[:subIndex]
            qt22 = qTemplate[subIndex:]
            mSqt20 = 0
            preResult = set()
            for vQt20 in qtList['20']:
                vQt201 = vQt20[:vQt20.index('|||qS|||')]
                vQt202 = vQt20[vQt20.index('|||qS|||') +
                               8:vQt20.index(' ===>>> ')]
                vQt20pre = vQt20[vQt20.index(' ===>>> ') + 8:]
                sTmp20 = Levenshtein.jaro(vQt201, qt21) + Levenshtein.jaro(
                    vQt202, qt22)
                sTmp20pre = 0
                preTmp = set()
                for kb in self.kbDict:
                    for pre in kb:
                        tmp = Levenshtein.jaro(vQt20pre, pre)
                        if tmp > sTmp20pre:
                            sTmp20pre = tmp
                            preTmp = set()
                        if tmp == sTmp20pre:
                            preTmp.add(pre)
                sTmp20 = (sTmp20 + sTmp20pre +
                          Levenshtein.jaro(lcsSub, self.sub)) / 4
                if sTmp20 > mSqt20:
                    mSqt20 = sTmp20
                    preResult = set()
                if sTmp20 == mSqt20:
                    for pre in preTmp:
                        preResult.add(pre)
            self.pre = preResult
            self.score = mSqt20
        return self.score
예제 #31
0
 def test_longest_common_subsequence(self):
     self.assertEqual(lcs("waaaa", "bbbbasfaaewra"), "aaaa")
     self.assertEqual(lcs("abc", "def"), "")
     self.assertEqual(lcs("123", "01234"), "123")
     self.assertEqual(lcs("abcd", "dcba"), "c")
예제 #32
0
def alternative_pal(s):
    r = s[::-1]
    L = len(lcs(s, r))
    return len(s) - L
예제 #33
0
 def test_lcs(self):
     self.assertEqual(lcs(self.seq1, self.seq2), 3)
     self.assertEqual(lcs(self.seq3, self.seq4), 4)
예제 #34
0
def evaluate_step_pair(a_tokens, a_tags, b_tokens, b_tags):
    return lcs(tensor_to_list(a_tokens), tensor_to_list(b_tokens))
예제 #35
0
from ground_truth import ground_truth
from translate import translate
from lcs import lcs
from numpy import median

import sys, os

rate_list = []
ground_truth = ground_truth('rsa_' + sys.argv[1] + '.sp')

for i in range(1, 1001):
    inputfile = os.path.join('../RSA/client/results/',
                             'result-' + sys.argv[2] + '-' + str(i) + '.txt')
    recovered = translate(inputfile)

    recover_rate = float(len(lcs(recovered, ground_truth))) / float(
        len(ground_truth))
    rate_list.append(recover_rate)

    print(str(i) + ': Recover rate is %.2f%%' % (recover_rate * 100))

Average = float(sum(rate_list)) / 1000
Median = median(rate_list)
Max = max(rate_list)

f = open("recover_rate.txt", "w+")

f.write('Average recover rate is %.2f%%\n' % (Average * 100))
f.write('Median recover rate is %.2f%%\n' % (Median * 100))
f.write('Maximum recover rate is %.2f%%\n' % (Max * 100))
예제 #36
0
파일: try.py 프로젝트: YLAsce/oj
from lcs import lcs
print lcs('aaa', 'a')
from lcs import length_lcs
from lcs import lcs

if __name__ == "__main__":
    x = list(input("\n Enter the first sequence:"))
    y = list(input("\n Enter the second sequence:"))
    m = len(x)
    n = len(y)
    (array, length) = length_lcs(x, y, m, n)
    print('\n Length of lcs is:', length)
    print('\n LCS is:', lcs(x, y, array, length))
예제 #38
0
def image_fisher_featurize_sift_lcs(im_keys, out_matrix, bidx, gmm_sift,
                                    pca_sift_mat, gmm_lcs, pca_lcs_mat):
    all_sift_features = []
    all_lcs_features = []
    t = time.time()
    for im_key in im_keys:
        s3 = boto3.resource('s3')
        client = boto3.client('s3')
        bio = io.BytesIO(
            client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read())
        im = scipy.misc.imread(bio, flatten=True)
        im = im.astype('float32')
        im /= 255.0
        sift_descs = sift.sift(im)
        assert (np.all(sift_descs >= 0))
        sift_descs = np.sqrt(sift_descs)
        sift_descs = (sift_descs).dot(pca_sift_mat)
        sift_features = fisher.fisher_vector_features(
            sift_descs.astype('float32'), *gmm_sift)
        sift_features /= np.linalg.norm(sift_features)

        bio = io.BytesIO(
            client.get_object(Bucket="pictureweb", Key=im_key)["Body"].read())
        im = scipy.misc.imread(bio)
        lcs_descs = lcs.lcs(im).reshape(-1, LCS_DESC_LENGTH)
        try:
            assert (np.any(np.isnan(lcs_descs)) == False)
        except:
            raise Exception("RAISING LCS Error pre pca in {0}".format(im_key))

        lcs_descs = (lcs_descs).dot(pca_lcs_mat)
        try:
            assert (np.any(np.isnan(lcs_descs)) == False)
        except:
            raise Exception("RAISING LCS Error post pca in {0}".format(im_key))

        lcs_features = fisher.fisher_vector_features(
            lcs_descs.astype('float32'), *gmm_lcs)
        lcs_features /= np.linalg.norm(lcs_features)
        try:
            assert (np.any(np.isnan(lcs_features)) == False)
        except:
            raise Exception(
                "RAISING LCS Fisher Vector Error in {0}".format(im_key))

        all_sift_features.append(sift_features)
        all_lcs_features.append(lcs_features)

    all_sift_features = np.array(all_sift_features)
    all_lcs_features = np.array(all_lcs_features)

    assert (np.any(np.isnan(all_sift_features)) == False)
    #sqrt normalization
    signs = np.sign(all_sift_features)
    all_sift_features = signs * np.sqrt(np.abs(all_sift_features))
    feature_norms = np.linalg.norm(all_sift_features, axis=1)[:, np.newaxis]
    assert (np.any(np.isnan(feature_norms)) == False)
    all_sift_features /= feature_norms
    assert (np.any(np.isnan(all_sift_features)) == False)

    assert (np.any(np.isnan(all_lcs_features)) == False)
    signs = np.sign(all_lcs_features)
    all_lcs_features = signs * np.sqrt(np.abs(all_lcs_features))
    feature_norms = np.linalg.norm(all_lcs_features, axis=1)[:, np.newaxis]
    assert (np.any(np.isnan(feature_norms)) == False)
    all_lcs_features /= feature_norms
    assert (np.any(np.isnan(all_lcs_features)) == False)

    features = np.hstack((all_sift_features, all_lcs_features))
    out_matrix.put_block(features, bidx, 0)
    e = time.time()
    return e - t, t, e
예제 #39
0
def calculate_lcs(words):
  return len(lcs.lcs(words[0][0:10], words[1][0:10]))
예제 #40
0
# Given a string what is the min. no of insertions required to make the string palindrome
# Step 1 : Take reverse of string
# Step 2 : Apply LCS on string,rev of string
# Step 3 : result = len(string) - len(lcs)
from lcs import lcs

s = "HELLO"
x = lcs(s, s[::-1])
print(len(s) - x)
예제 #41
0
def lcs(text1, text2):
    value = seq.lcs(text1, text2)
    return abs(float(len(text1)-len(value))/float(len(text1)))
def shortestCommonSuperSequence(s1, s2):
    print('length= ', len(s1) + len(s2) - lcs(s1, s2))
예제 #43
0
def answerQ(qRaw, lKey, kbDict, qtList, threshold=0, debug=False):

    q = qRaw.strip().replace(' ', '')
    qtType = 0  #0:sub+pre 1:pre+sub 2:sub

    maxSubLen = 0
    maxSubSetTmp = set()
    maxSubSet = set()
    maxPreLen = 0
    maxPreSet = set()
    maxSPLen = 0
    maxSPSet = set()

    result = ''
    lcsSub = ''
    lcstemp = ''
    lcsPre = ''
    subIndex = 0
    scoreSub = 0
    qRemoveSub = ''

    preIndex = 0
    scoreSub = 0
    qRemoveSubPre = ''

    maxScore = 0
    qtMatchSet = set()
    bestAnswer = set()

    for qt01 in qtList['01']:
        if qt01 == '' or q.find(qt01) == 0:
            qR01 = q.replace(qt01, '', 1)
            for qt02 in qtList['02']:
                qFind2 = qR01.find(qt02)
                if qt02 == '' or qFind2 != 0:
                    subCandidate = qR01[:qFind2]
                    qR02 = qR01[qFind2:].replace(qt02, '', 1)
                    for qt03 in qtList['03']:
                        qFind3 = qR02.find(qt03)
                        if qt03 == '' or qFind3 != 0:
                            preCandidate = qR02[:qFind3]
                            if subCandidate in kbDict:
                                for kb in kbDict[subCandidate]:
                                    if preCandidate in kb:
                                        newAnswerCandidate = answerCandidate(
                                            subCandidate, preCandidate, q)
                                        qtMatchSet.add(newAnswerCandidate)

    for qt11 in qtList['11']:
        if qt11 == '' or q.find(qt11) == 0:
            qR11 = q.replace(qt11, '', 1)
            for qt12 in qtList['12']:
                qFind2 = qR11.find(qt12)
                if qt12 == '' or qFind2 != 0:
                    preCandidate = qR11[:qFind2]
                    qR12 = qR11[qFind2:].replace(qt12, '', 1)
                    for qt13 in qtList['13']:
                        qFind3 = qR12.find(qt13)
                        if qt13 == '' or qFind3 != 0:
                            subCandidate = qR12[:qFind3]
                            if subCandidate in kbDict:
                                for kb in kbDict[subCandidate]:
                                    if preCandidate in kb:
                                        newAnswerCandidate = answerCandidate(
                                            subCandidate, preCandidate, q)
                                        qtMatchSet.add(newAnswerCandidate)


##
##    # First try to use question template to get perfectly matched QA pair
##    for qt00 in qtList['00']:
##        if qt00[0] == '' or q.find(qt00[0]) == 0:
##            qR0 = q.replace(qt00[0], '', 1)
##            qFind1 = qR0.find(qt00[1])
##            if qt00[1] == '' or qFind1 !=0:
##                subCandidate = qR0[:qFind1]
##                qR01 = qR0[qFind1:].replace(qt00[1], '', 1)
##                qFind2 = qR01.find(qt00[2])
##                if qt00[2] == '' or qFind2 !=0:
##                    preCandidate = qR01[:qFind2]
##                    if subCandidate in kbDict:
##                        for kb in kbDict[subCandidate]:
##                            if preCandidate in kb:
##                                newAnswerCandidate = answerCandidate(subCandidate, preCandidate, q)
##                                qtMatchSet.add(newAnswerCandidate)
##
##
##    for qt10 in qtList['10']:
##        if qt10[0] == '' or q.find(qt10[0]) == 0:
##            qR0 = q.replace(qt10[0], '', 1)
##            qFind1 = qR0.find(qt10[1])
##            if qt10[1] == '' or qFind1 !=0:
##                preCandidate = qR0[:qFind1]
##                qR01 = qR0[qFind1:].replace(qt10[1], '', 1)
##                qFind2 = qR01.find(qt10[2])
##                if qt10[2] == '' or qFind2 !=0:
##                    subCandidate = qR01[:qFind2]
##                    if subCandidate in kbDict:
##                        for kb in kbDict[subCandidate]:
##                            if preCandidate in kb:
##                                newAnswerCandidate = answerCandidate(subCandidate, preCandidate, q)
##                                qtMatchSet.add(newAnswerCandidate)
##

    for key in lKey:
        lcsSub = lcs.lcs(q, key)
        if lcsSub == '':
            continue
        lcsSubLen = len(lcsSub)
        if maxSubLen < lcsSubLen:
            maxSubSetTmp = set()
            maxSubLen = lcsSubLen

        if maxSubLen == lcsSubLen:
            maxSubSetTmp.add(key)

    maxSPLen = maxSubLen

    for key in lKey:
        lcsSub = lcs.lcs(q, key)
        if lcsSub == '':
            continue

        lcsSubLen = len(lcsSub)

        lcsSubIndex = q.index(lcsSub)
        qRemoveSub1 = q[:lcsSubIndex]
        qRemoveSub1Len = len(qRemoveSub1)
        qRemoveSub2 = q[lcsSubIndex + lcsSubLen:]
        qRemoveSub2Len = len(qRemoveSub2)
        foundPre = 0
        for kb in kbDict[key]:
            for pre in list(kb):
                preLen = len(pre)
                lcsPre1 = ''
                lcsPre2 = ''
                if maxSubLen == lcsSubLen:
                    lcsPre1 = lcs.lcs(qRemoveSub1, pre)
                    lcsPre2 = lcs.lcs(qRemoveSub2, pre)
                    if lcsPre1 != '' or lcsPre2 != '':
                        newAnswerCandidate = answerCandidate(key, pre, q)
                        foundPre = 1
                        maxSubSet.add(newAnswerCandidate)

                if preLen > maxPreLen:
                    if qRemoveSub1Len > maxPreLen:
                        lcsPre1 = lcs.lcs(qRemoveSub1, pre)
                    if qRemoveSub2Len > maxPreLen:
                        lcsPre2 = lcs.lcs(qRemoveSub2, pre)
                    maxLcsPre12 = max(len(lcsPre1), len(lcsPre2))
                    if maxLcsPre12 > maxPreLen:
                        maxPreLen = maxLcsPre12
                        maxPreSet = set()
                    if maxLcsPre12 == maxPreLen:
                        newAnswerCandidate = answerCandidate(key, pre, q)
                        maxPreSet.add(newAnswerCandidate)

                maxResidual = maxSPLen - lcsSubLen
                if preLen > maxResidual:
                    if qRemoveSub1Len > maxResidual:
                        lcsPre1 = lcs.lcs(qRemoveSub1, pre)
                    if qRemoveSub2Len > maxResidual:
                        lcsPre2 = lcs.lcs(qRemoveSub2, pre)
                    maxResidual12 = max(len(lcsPre1), len(lcsPre2))
                    if maxResidual12 > maxResidual:
                        maxSPLen = maxResidual12 + lcsSubLen
                        maxResidual = maxSPLen - lcsSubLen
                        maxSPSet = set()
                    if maxResidual12 == maxResidual:
                        newAnswerCandidate = answerCandidate(key, pre, q)
                        maxSPSet.add(newAnswerCandidate)
        if foundPre == 0 and maxSubLen == lcsSubLen:
            newAnswerCandidate = answerCandidate(key, '', q, 2, 0, kbDict[key])
            maxSubSet.add(newAnswerCandidate)

    maxSubSetCopy = maxSubSet.copy()
    #print('len(maxSubSet) = ' + str(len(maxSubSetCopy)), end = '\r', flush=True)
    maxSubSet = set()
    for aCandidate in maxSubSetCopy:
        aCfound = 0
        for aC in maxSubSet:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            maxSubSet.add(aCandidate)

    maxPreSetCopy = maxPreSet.copy()
    #print('len(maxPreSet) = ' + str(len(maxPreSetCopy)), end = '\r', flush=True)
    maxPreSet = set()

    for aCandidate in maxPreSetCopy:
        aCfound = 0
        for aC in maxPreSet:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            maxPreSet.add(aCandidate)

    maxSPSetCopy = maxSPSet.copy()
    #print('len(maxSPSet) = ' + str(len(maxSPSetCopy)), end = '\r', flush=True)
    maxSPSet = set()
    for aCandidate in maxSPSetCopy:
        aCfound = 0
        for aC in maxSPSet:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            maxSPSet.add(aCandidate)

    for aCandidate in maxSubSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    for aCandidate in maxPreSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    for aCandidate in maxSPSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    for aCandidate in qtMatchSet:
        scoreTmp = aCandidate.calcScore(qtList)
        if scoreTmp > maxScore:
            maxScore = scoreTmp
            bestAnswer = set()
        if scoreTmp == maxScore:
            bestAnswer.add(aCandidate)

    bestAnswerCopy = bestAnswer.copy()

    bestAnswer = set()

    for aCandidate in bestAnswerCopy:
        aCfound = 0
        for aC in bestAnswer:
            if aC.pre == aCandidate.pre and aC.sub == aCandidate.sub:
                aCfound = 1
                break
        if aCfound == 0:
            bestAnswer.add(aCandidate)

    if debug == False:
        return bestAnswer
    else:
        return [bestAnswer, maxSubSet, maxPreSet, maxSPSet]
예제 #44
0
파일: google.py 프로젝트: Answeror/pypaper
 def _select_best(self, articles):
     from gn import gn
     from lcs import lcs
     return min(articles, key=lambda art: lcs(self.gn, gn(_em(art['title']))))
                        with tag('div',klass = "bg-danger"):
                            line('i','',klass = 'fa fa-remove pull-right')
                            line('h4',i[3:])
                    if i[0:3] == 'LCS':
                        with tag('div',klass = "bg-info"):
                            line('i','',klass = 'fa fa-star pull-right')
                            line('h4',i[3:])



    return indent(doc.getvalue())

def saveMarkedUpContentToFile(content):
        with open('output.html', 'w') as f:
            f.write(content)




if __name__ == "__main__" :
    input1 = get_file_content('input1.txt')
    input2 = get_file_content('input2.txt')

    operations = lcs.lcs(input1[0],input2[0])
    input1 = ''.join(input1[0])
    input2 = ''.join(input2[0])
    print operations
    content = generate_html_code(input1,input2,operations)
    saveMarkedUpContentToFile(content)

예제 #46
0
def compare_value(value1, value2):
  if value1 is None and value2 is None:
    return 0.0
  if value1 is None or value2 is None:
    return 1.0
  return 1.0 - (float(len(lcs.lcs(lcs.path(value1, value2)))) / max(len(value1), len(value2)))