Esempi in Python per distance, esempi in Python per Levenshtein.distance

Esempio n. 1

0

Mostra file

File: Equivalences.py Progetto: colinwinslow/proofmeister

 def getSuccessors(self, statement, i, goalCoHash = None):
     if statement.type(i) == "conjunction" or statement.type(i) == "disjunction":
         thisType = statement.type(i)
         if thisType == "conjunction": otherType = "disjunction"
         else: otherType = "conjunction"
         successors = []
         if statement.type(i*2+2)==otherType: # ie p & (q v r); thisType=="conjunction", otherType = "disjunction"
             p = statement.childTree(i*2+1)
             q = statement.childTree(i*4+5)
             r = statement.childTree(i*4+6)
             if p==q or p==r:
                 successor = statement.graft(i,p)
                 successor.action = self.name
                 successor.cost = self.cost + distance(str(statement), str(successor))
                 successors.append(successor)
                 
         if statement.type(i*2+1)==otherType: # ie (q v r) & p; thisType=="conjunction", otherType = "disjunction"
             p = statement.childTree(i*2+2)
             q = statement.childTree(i*4+3)
             r = statement.childTree(i*4+4)
             if p==q or p==r:
                 successor = statement.graft(i,p)
                 successor.action = self.name
                 successor.cost = self.cost + distance(str(statement), str(successor))
                 successors.append(successor)
         if len(successors) == 1:
             return successors[0]
         elif len(successors) > 1:
             return successors
         else: return None

Esempio n. 2

0

Mostra file

File: Equivalences.py Progetto: colinwinslow/proofmeister

 def getSuccessors(self, statement, i, goalCoHash = None):
     if statement.type(i) == "conjunction":
         if statement.type(i*2+1)=="false_constant":
             successor = statement.childTree(0)
             successor.prune(i)
             successor.insertProp(i, "false_constant")
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor
         elif statement.type(i*2+2)=="false_constant":
             successor = statement.childTree(0)
             successor.prune(i)
             successor.insertProp(i, "false_constant")
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor
     elif statement.type(i) == "disjunction":
         if statement.type(i*2+1)=="true_constant":
             successor = statement.childTree(0)
             successor.prune(i)
             successor.insertProp(i, "true_constant")
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor
         elif statement.type(i*2+2)=="true_constant":
             successor = statement.childTree(0)
             successor.prune(i)
             successor.insertProp(i, "true_constant")
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor

Esempio n. 3

0

Mostra file

File: DrQ_search_engine.py Progetto: hanhanwu/Hanhan-TravelPlusPlus

def get_min_score(tokens, img_name):
  min_dist = distance(tokens[0], img_name)
  for i in range(1,len(tokens)):
    dist = distance(tokens[i], img_name)
    if min_dist > dist:
      min_dist = dist
  return min_dist

Esempio n. 4

0

Mostra file

File: Search.py Progetto: colinwinslow/proofmeister

def search(start,goal,rules,verbose = False):
    goalStr = str(goal)
#    l = len(str(start))+len(str(goal))
    nodesExpanded = 0
    shortcuts = 0
    node = Node(start, None)
    node.cost = distance(str(node.state), goalStr)
    frontier = PriorityQueue()
    frontier.push(node,node.cost)
    explored = set()
    while not frontier.isEmpty():
        if nodesExpanded > 2000:
            raise TimeOutException()
        node = frontier.pop()
        if nodesExpanded%10==0:
            print nodesExpanded
        nodesExpanded += 1
        if node.state == goal:
#            print "expanded: ", nodesExpanded, " shortcuts: ", shortcuts
            print "expanded: ", nodesExpanded, " shortcuts: ", shortcuts
            return Derivation(start,goal,node.traceback(),rules)
        explored.add(node.state)
        for child in node.successors(rules,goal):
            h = distance(str(child.state), goalStr)
            if child.state not in explored and frontier.getCheapestCost(child) == -1:
                frontier.push(child, child.cost + h)
                if verbose: 
                    print child.cost, child.state, h
            elif frontier.getCheapestCost(child) > child.cost:
                shortcuts += 1
                frontier.push(child, child.cost + h)
    print "NOT LOGICALLY EQUIVALENT"
    return False

Esempio n. 5

0

Mostra file

File: ga.py Progetto: oppianmatt/westlondonhack_20130711

def mate_pop(top, population):
    new_pop = []
    new_pop.append((distance(top, TARGET), top))
    for pair in population:
        string = mutate(crossover(pair[1], top))
        d = distance(string, TARGET)
        new_pop.append((d, string))
    new_pop.pop()
    new_pop.sort()
    return new_pop

Esempio n. 6

0

Mostra file

File: dist.py Progetto: reamdc1/paper_program_test

def return_operon_string_distance(operon_string, gene_string):
    len_operon = len(operon_string)
    len_gene_group = len(gene_string)
    length_difference = len_operon - len_gene_group
    
    reverse_gene_string = gene_string[::-1]
    
    d1 = distance(operon_string, gene_string) - length_difference
    d2 = distance(operon_string, reverse_gene_string) - length_difference
    
    return min(d1, d2)

Esempio n. 7

0

Mostra file

File: Equivalences.py Progetto: colinwinslow/proofmeister

 def getSuccessors(self, statement, i, goalCoHash = None):
     #maybe make commutativity discount its cost when it's close to the goalCoHash, using the older equivalence hashing method that ignores order for commuativ operators
     
     if statement.type(i) == "conjunction" or statement.type(i) == "disjunction":
         left = statement[i*2+1]
         right = statement[i*2+2]
         successor = statement.graft(i*2+1,right)
         successor.graftInPlace(i*2+2,left)
         successor.action = self.name
         if statement.cohash()==goalCoHash: successor.cost = self.cost + distance(str(statement), str(successor))
         else: successor.cost = self.cost + 10*distance(str(statement), str(successor))
         return successor

Esempio n. 8

0

Mostra file

File: models.py Progetto: Puyb/inscriptions_roller

 def match(self, equipe):
     if self.categorie and not self.categorie.valide(equipe):
         return False
     equipiers_challenge = Equipier.objects.filter(equipe__challenges__participation=self)
     c = 0
     equipiers = equipe.equipier_set.all()
     for e in equipiers:
         for e2 in equipiers_challenge:
             if e.justificatif == 'licence' and e2.justificatif == 'licence' and e.num_licence == e2.num_licence:
                 c += 1
             elif distance(e.nom.lower(), e2.nom.lower()) < 3 and distance(e.prenom.lower(), e2.prenom.lower()) < 3:
                 c += 1
     return c >= len(equipiers) / 2

Esempio n. 9

0

Mostra file

File: Equivalences.py Progetto: colinwinslow/proofmeister

 def getSuccessors(self, statement, i, goalCoHash = None):
     if statement.type(i) == "negation":
         if statement.type(i*2+1) == "conjunction" or statement.type(i*2+1) == "disjunction":
             thisType = statement.type(i*2+1)
             if thisType == "conjunction": otherType = "disjunction"
             else: otherType = "conjunction"
             
             np = statement.negatedChildTree(i*4+3)
             nq = statement.negatedChildTree(i*4+4)
             
             ns = Statement(dict(),statement.propMap)
             ns.insertProp(0, otherType)
             ns.graftInPlace(1,np)
             ns.graftInPlace(2,nq)
             successor = statement.graft(i,ns)
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor
     elif statement.type(i) == "conjunction" or statement.type(i) == "disjunction":
         thisType = statement.type(i)
         if thisType == "conjunction": otherType = "disjunction"
         else: otherType = "conjunction"
         
         if self.dangerous:
             np = statement.negatedChildTree(i*2+1)
             nq = statement.negatedChildTree(i*2+2)
             
             ns = Statement(dict(),statement.propMap)
             ns.insertProp(0, "negation")
             ns.insertProp(1, otherType)
             ns.graftInPlace(3,np)
             ns.graftInPlace(4,nq)
             successor = statement.graft(i,ns)
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor
         
         else:
             if statement.type(i*2+1) == "negation" and statement.type(i*2+2) == "negation":
                 p = statement.childTree(i*4+3)
                 q = statement.childTree(i*4+5)
                 ns = Statement(dict(),statement.propMap)
                 ns.insertProp(0, "negation")
                 ns.insertProp(1, otherType)
                 ns.graftInPlace(3,p)
                 ns.graftInPlace(4,q)
                 successor = statement.graft(i,ns)
                 successor.action = self.name
                 successor.cost = self.cost + distance(str(statement), str(successor))
                 return successor

Esempio n. 10

0

Mostra file

File: Equivalences.py Progetto: colinwinslow/proofmeister

 def getSuccessors(self, statement, i, goalCoHash = None):
     if statement.type(i) == "conjunction" or statement.type(i) == "disjunction":
         thisType = statement.type(i)
         if thisType == "conjunction": otherType = "disjunction"
         else: otherType = "conjunction"
         successors = []
         if statement.type(i*2+2)==otherType: # ie p & (q v r); thisType=="conjunction", otherType = "disjunction"
             p = statement.childTree(i*2+1)
             p2 = statement.childTree(i*2+1)
             q = statement.childTree(i*4+5)
             r = statement.childTree(i*4+6)
             
             successor = statement.childTree(0)
             successor.prune(i)
             successor.insertProp(i,otherType)       # _ v _
             successor.insertProp(i*2+1,thisType)    # (_ & _) v _
             successor.insertProp(i*2+2,thisType)    # (_ & _) v (_ & _)
             successor.graftInPlace(i*4+3,p)         # (p & _) v (_ & _)
             successor.graftInPlace(i*4+4,q)         # (p & q) v (_ & _)
             successor.graftInPlace(i*4+5,p2)        # (p & q) v (p2 & _)
             successor.graftInPlace(i*4+6,r)         # (p & q) v (p2 & r)
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             successors.append(successor)
             
             
         if statement.type(i*2+1)==otherType:
             p = statement.childTree(i*2+2)
             p2 = statement.childTree(i*2+2)
             q = statement.childTree(i*4+3)
             r = statement.childTree(i*4+4)
             
             successor = statement.childTree(0)
             successor.prune(i)
             successor.insertProp(i,otherType)       
             successor.insertProp(i*2+1,thisType)    
             successor.insertProp(i*2+2,thisType)    
             successor.graftInPlace(i*4+3,p)    
             successor.graftInPlace(i*4+4,q)     
             successor.graftInPlace(i*4+5,p2)    
             successor.graftInPlace(i*4+6,r)    
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             successors.append(successor)
         if len(successors) == 1:
             return successors[0]
         elif len(successors) > 1:
             return successors
         else: return None

Esempio n. 11

0

Mostra file

File: hcv_reference_tree.py Progetto: cfe-lab/MiCall

def check_distances(combined_hcv, report_file=None):
    references = [(header, sequence.replace('-', ''))
                  for header, sequence in iterate_fasta(combined_hcv)
                  if header.startswith('Ref.')]

    combined_hcv.seek(0)
    samples = ((header, sequence.replace('-', ''))
               for header, sequence in iterate_fasta(combined_hcv)
               if header.startswith('Sample.'))
    for header, sequence in samples:
        reported_genotype = header.split('-')[-1]
        reported_ref = best_ref = min_distance = reported_distance = None
        reported_size = best_size = 0
        for ref_header, ref_seq in references:
            ref_genotype = ref_header.split('-')[-1]
            d = distance(sequence, ref_seq)
            if (ref_genotype == reported_genotype and
                    (reported_distance is None or d < reported_distance)):
                reported_distance = d
                reported_ref = ref_header
                reported_size = len(ref_seq)
            if min_distance is None or d < min_distance:
                min_distance = d
                best_ref = ref_header
                best_size = len(ref_seq)
        if min_distance != reported_distance:
            best_genotype = best_ref.split('-')[-1]
            print(f'Reported {reported_genotype}, but {best_genotype} is '
                  f'closer: {header}(0/{len(sequence)}), '
                  f'{reported_ref}({reported_distance}/{reported_size}), '
                  f'{best_ref}({min_distance}/{best_size}).',
                  file=report_file)

Esempio n. 12

0

Mostra file

File: utils.py Progetto: adammck/smspoll

def extract_option(text):
    """
    Return the Option referenced by ``text`` (in various fuzzy ways), or
    raise a ValueError if none could be found. This function tries hard
    to find an Option, as sort-of documented by the ``tests`` module.
    """

    if not isinstance(text, basestring):
        raise TypeError("Not a basestring: %r" % text)

    t = unicode(text).lower()
    matches = []

    for option in Option.objects.all():
        l = option.letter.lower()
        c = option.caption.lower()

        # return early if this is an exact match.
        if (t == l) or (t == c):
            return option

        # otherwise, compile a list of distances.
        d = distance(t, c)
        if (d is not None) and (d <= settings.MAX_MATCH_DISTANCE):
            matches.append((option, d))

    # return the closest option.
    if len(matches):
        m = sorted(matches, key=lambda x: x[1])
        return m[0][0]

    raise ValueError("No Option could be found in: %s" % text)

Esempio n. 13

0

Mostra file

File: add_ids_to_filenames.py Progetto: chesnolabs/candidates2014

def find_similar_names(search_name, base, default_distance):
	similar_names = list()
	for mpid in base.keys():
		# mpid, name, link, party, ticket, district,\
		# rid, rdate, urid, urdate, urreason,\
		# bio, profile, party12, ticket12, link12,\
		# district12, did12, dlink12, loh, lohcom,\
		# corrupt, autobio, biolink, decl, decllink

		name = base[mpid][1]
		district = base[mpid][5]

		dist = list()
		for pair in zip(search_name, name):
			search_name_el, name_el = pair
			if len(search_name_el) == 1:
				name_el = name_el[0:1]
			current_dist = distance(search_name_el, name_el)
			dist.append(current_dist)

		if len(search_name) == len(name) and sum(dist) == 0:
			return [[mpid, name, district]]

		if all(d < default_distance for d in dist):
			similar_names.append([mpid, name, district])
	return similar_names

Esempio n. 14

0

Mostra file

File: admin_course.py Progetto: Puyb/inscriptions_roller

    def anomalies(self, request):
        request.current_app = self.name
        uid = request.COOKIES['course_uid']
        course = Course.objects.get(uid=uid, accreditations__user=request.user)
        equipiers = list(Equipier.objects.filter(equipe__course=course).select_related('equipe__categorie'))

        doublons = []
        for i, e in enumerate(equipiers):
            if e.numero > e.equipe.nombre:
                continue
            dbl = []
            for j in range(i + 1, len(equipiers)):
                e2 = equipiers[j]
                if e2.numero > e2.equipe.nombre:
                    continue
                if distance((e.nom + ' ' + e.prenom).lower(), (e2.nom + ' ' + e2.prenom).lower()) < 3:
                    dbl.append(e2)
            if dbl:
                dbl.insert(0, e)
                doublons.append(dbl)
        print(doublons)

        return TemplateResponse(request, 'admin/anomalies.html', dict(self.each_context(request),
            doublons=doublons,
            course=course,
        ))

Esempio n. 15

0

Mostra file

File: post_process.py Progetto: Rsullivan00/labelRecognizer

def fix_garbage_sugar(pairs):
    betterPairs = []
    i = -1
    done = False

    for pair in pairs:
        slide = (len(pair[0]) - 6) + 1
        if(slide < 1):
            slide = 1
        for i in range(slide):
            temp = pair[0]
            #print(temp[i:(5 + i)])
            dist = distance(Keywords.label.sugars, temp[i:(5 + i)])
            if dist <= 2:
                i = pair[2]
                done = True
                break
        if(done):
            break

    for pair in pairs:
        name = pair[0]
        if(pair[2] == i):
            name = Keywords.label.sugars
        betterPairs.append((name, pair[1], pair[2]))

    return betterPairs

Esempio n. 16

0

Mostra file

File: bigshots_gen.py Progetto: 4bic/scraper_Mozmbq

def generate_aliases(table, ref_list, match_list, dist_limit=3):
    comps = 0.0
    total_comps = float(max(1, len(ref_list) * len(match_list)))
    for ref in ref_list:
        if not table.find_one(name=ref["name"]):
            table.insert({"name": ref["name"], "fp": ref["fp"], "canonical": ref["name"]})

        for match in match_list:
            dist = distance(match["fp"], ref["fp"])
            comps += 1.0
            if comps and comps % 100000 == 0:
                pct_comps = int((comps / total_comps) * 100)
                print "%s matching: %s%%" % (table.table.name, pct_comps)
            if dist < dist_limit:
                if not table.find_one(name=match["name"]):
                    table.insert(
                        {
                            "name": match["name"],
                            "fp": match["fp"],
                            "candidate": ref["name"],
                            "distance": dist,
                            "canonical": match["name"],
                        }
                    )
                # print 'Match? %r -> %r' % (ref['name'], match['name'])

    write_aliases(table)

Esempio n. 17

0

Mostra file

File: models.py Progetto: unhookme/pnwinsects-app

    def search_by_similar_name(self, genus, species):
        """
        Search for Species with a similarly spelled name as the given name.

        This method can help correct spelling mistakes in species names.
        """
        matches = self.filter(
            genus__startswith=genus[:2],
            genus__endswith=genus[-2:],
            species__startswith=species[:2],
            species__endswith=species[-2:]
        )
        complete_name = u" ".join((genus, species))
        min_match = 10
        min_match_species = None
        for match in matches:
            d = distance(complete_name, unicode(match))
            if d < min_match:
                min_match = d
                min_match_species = match

        if min_match_species and d < 3:
            species_by_fullname[complete_name] = min_match_species
        else:
            species = Species.objects.create(genus=i[0], species=i[1])
            species_by_fullname[complete_name] = species

Esempio n. 18

0

Mostra file

File: code1.py Progetto: richardxy/tesproject

    def clustering(self, elems):
        """
        Clusterize the input elements.

        Input: list of words (e.g. list of URLs). It MUST be sorted!

        Process: build a dictionary where keys are cluster IDs (int) and
                 values are lists (elements in the given cluster)
        """
        clusters = {}
        cid = 0

        for i, line in enumerate(elems):
            if i == 0:
                clusters[cid] = []
                clusters[cid].append(line)
            else:
                last = clusters[cid][-1]
                if distance(last, line) <= DISTANCE:
                    clusters[cid].append(line)
                else:
                    cid += 1
                    clusters[cid] = []
                    clusters[cid].append(line)
        #
        self.clusters['clusters'] = clusters
        self.clusters['clusters']['largest'] = self.get_largest_cluster()
        self.clusters['clusters']['number_of_clusters'] = cid + 1

Esempio n. 19

0

Mostra file

File: word.py Progetto: kariminf/ArArud

def getTemplateNoDiac(word):
	"""This function take an Arabic word as parameter;
	it deletes its diacritics if exist; then returns the possible Templates;
	If there are many possible templates, they will be separated with + """

	template = u""
	minDistance = 1000
	word_u = deleteDiacritics(word)
	word_u = unicode(word_u)
	for wazn in wazns:
		wazn_u = deleteDiacritics(wazn)
		wazn_u = deleteRoot(wazn_u)
		wazn_u = unicode(wazn_u)
		if len(wazn_u) != len(word_u):
			continue

		#print "distance(" + word_u + "," + wazn_u + ")"
		distanceI = distance(word_u, wazn_u)
		if distanceI < minDistance:
			if re.match(wazn_u, word_u):
				minDistance = distanceI
				template = wazn
			continue
		if distanceI == minDistance:
			if re.match(wazn_u, word_u):
				template = template + '+' + wazn
	return template

Esempio n. 20

0

Mostra file

File: generate_undersampled_h5.py Progetto: evanmiltenburg/GroundedTranslation

def optional_check():
    "Optionally check for sentences that we failed to match."
    print 'Not found:'
    not_found = {s for h,s in negation_sentence_hashes.items() if not h in found}
    print '\n'.join(not_found)

    from Levenshtein import distance

    print ''
    print 'Computing Levenshtein distances to find candidates we could have missed.'

    found_missing = False
    for split in ['train']:
        for key in old_file[split]:
            for description in old_file[split][key]['descriptions']:
                for sentence in not_found:
                    h1 = sentence_hash(sentence)
                    h2 = sentence_hash(description)
                    if distance(h1, h2) < 15:
                        found_missing = True
                        print 'POSSIBLE MATCH:'
                        print sentence
                        print description
                        print '----------------------------------'

    if not found_missing:
        print 'Matched all we could possibly match.'

Esempio n. 21

0

Mostra file

File: ga.py Progetto: oppianmatt/westlondonhack_20130711

def main():
    try2()
    return
    population = []
    population = givemepop(population)
    print population
    for i in range(0, CYCLES):

#    for i in range(0, POP_SIZE):
#        s = givemestring(len(TARGET))
#        d = distance(s, TARGET)
#        population.append((d, s))
#    population.sort()

        mated = crossover(population[0][1], population[1][1])
        d = distance(mated, TARGET)
        new_pop = []
        new_pop.append((d, mated))
        new_pop.append(population[0])
        new_pop.append(population[1])
#    for i in range(0, POP_SIZE - len(new_pop)):
#        s = givemestring(len(TARGET))
#        d = distance(s, TARGET)
#        new_pop.append((d, s))
#    new_pop.sort()
        new_pop = givemepop(new_pop)
        print new_pop
        population = new_pop

Esempio n. 22

0

Mostra file

File: wsgi.py Progetto: ralekseenkov/calspeed-karting-points

def admin_season(selected_season):
    # Get and validate season
    seasons = config.get_all_seasons()
    if not selected_season in seasons:
        abort(404, "Season data not found")
    season_data = config.get_season_data(selected_season)

    # Do we need to recalculate points?
    if 'action' in request.form and request.form['action'] == 'recalculate_points':
        season_data.calc_and_store_points()
        return jsonify()

    # Otherwise, just display the season page. Find similar drivers in real-time, before we do this
    results_table = season_data.get_results_for_class()
    d = sorted([row["driver"] for row in results_table.table])

    similar_drivers = []
    for i in xrange(len(d)):
        for j in xrange(i + 1, len(d)):
            dist = distance(d[i], d[j])
            if dist <= 4:
                similar_drivers.append({"name1": d[i], "name2": d[j], "distance": dist})
    similar_drivers = sorted(similar_drivers, key=itemgetter("distance"))

    driver_name_corrections = config.get_driver_name_corrections(selected_season)

    return render_template("admin.html", seasons=seasons, selected_season=selected_season, season_data=season_data,
                           similar_drivers=similar_drivers, driver_name_corrections=driver_name_corrections)

Esempio n. 23

0

Mostra file

File: realtime.py Progetto: kelvan/gotoVienna

    def get_departures_by_station(self, station):
        """ Get list of Departures for one station
        """

        # TODO 1. Error handling
        # TODO 2. more error handling
        # TODO 3. ultimative error handling

        station = station.encode('UTF-8')
        html = urlopen(defaults.departures_by_station % quote_plus(station)).read()

        li = BeautifulSoup(html).ul.findAll('li')

        if li[0].a:
            # calculate levenshtein distance of results
            st = map(lambda x: (distance(station, x.a.text.encode('UTF-8')), x.a.text.encode('UTF-8'), x.a['href']), li)
            # take result with lowest levenshtein distance
            s = min(st)
            lnk = s[2]
            
            if len(st) > 1:
                print "Multiple results found, using best match:", s[1]
            
            html = urlopen(defaults.qando + lnk).read()

        dep = self.parse_departures_by_station(html)

        return dep

Esempio n. 24

0

Mostra file

File: similar.py Progetto: hesdilo/vocamap

def calc_similarity(word1, word2):
    len1 = len(word1)
    len2 = len(word2)
    l = len1
    if l < len2:
        l = len2
    return 100 - (100 / l) * distance(word1, word2)

Esempio n. 25

0

Mostra file

File: 01-basic.py Progetto: lhl/misc

  def unshred(self, output):
    distances = {}
    totheright = {}

    for key in self.strips:
      min_k = None
      min_d = None
      for key2 in self.strips:
        if key != key2:
          d = distance(self.strips[key]['right'], self.strips[key2]['left'])
          if min_k == None:
            min_k = key2
            min_d = d
          else:
            if d < min_d:
              min_k = key2
              min_d = d
      print '... strip %d closest match is %d (%d)' % (key, min_k, min_d) 
      distances[key] = min_d
      totheright[key] = min_k

    right_most = max(distances, key=distances.get)
    print 'We think that strip %d is the is the right-most strip!' % right_most

    del(totheright[right_most])
    pprint(totheright)
    self.ordered = [right_most]
    while totheright:
      for key in totheright.keys():
        if totheright[key] == self.ordered[0]:
          self.ordered.insert(0, key)
          del(totheright[key])
    print 'Here\'s our order:', self.ordered

    self._save(output)

Esempio n. 26

0

Mostra file

File: test_nucleotide_protein_converter.py Progetto: ericmjl/flu-gibson

def test_convert():
    # There are 4 amino acid changes, but because codons are chosen randomly,
    # based on experimental tests, there could be anywhere between 8 and 12
    # changes, inclusive.

    d = distance(str(np.src_nt.seq), str(np.des_nt.seq))
    assert d <= 12
    assert d >= 8

Esempio n. 27

0

Mostra file

File: query_sparse.py Progetto: AbeHandler/rookie

def matching(a, b):
    if a.lower() in b.lower() or b.lower() in a.lower(): # substring
        return True
    if get_jaccard(a, b) >= .3:
        return True
    if distance(a, b) < 3:
        return True
    return False

Esempio n. 28

0

Mostra file

File: Equivalences.py Progetto: colinwinslow/proofmeister

 def getSuccessors(self, statement, i, goalCoHash = None):
     if statement.type(i) == "conjunction" or statement.type(i) == "disjunction":
         left = statement[i * 2 + 1]
         if left == statement[i * 2 + 2]:
             successor = statement.graft(i, left)
             successor.action = self.name
             successor.cost = self.cost + distance(str(statement), str(successor))
             return successor

Esempio n. 29

0

Mostra file

File: ga.py Progetto: oppianmatt/westlondonhack_20130711

def givemepop(initialpop):
    result = initialpop
    for i in range(len(initialpop), POP_SIZE):
        s = givemestring(len(TARGET))
        d = distance(s, TARGET)
        result.append((d, s))
    result.sort()
    return result

Esempio n. 30

0

Mostra file

File: filter_names.py Progetto: danielmarcelino/Polling

def check_perms(names1, names2):
  # try removing one until distance is low enough
  for name in names1:
    spliced = filter(lambda n: n != name, names1)
    if distance(' '.join(spliced), ' '.join(names2)) <= thres:
      #print 'YAY, found match by trying permutations! (%s, %s)' % (' '.join(spliced), ' '.join(names2))
      return True
  return False

Esempio n. 31

0

Mostra file

def safe_distance(a, b):
    a = RX_DOT.sub('', a)
    b = RX_DOT.sub('', b)
    if a == b:
        return 0
    if len(a) == 0 or len(b) == 0:
        return max(len(a), len(b)) + 1
    if a[0] == b[0] and len(a) == 1 and len(b) > 3:
        return 0
    if a[0] == b[0] and len(b) == 1 and len(a) > 3:
        return 0
    if len(a) < 3 or len(b) < 3:
        return max(len(a), len(b)) + 1
    return distance(a, b)

Esempio n. 32

0

Mostra file

File: TestEvaluateResults.py Progetto: JohnnyFoulds/QuaggaLib

    def test_levenshtein(self):
        eq(distance('a', 'ab'), 1)  # number of additions, deletions, updates

        eq(ratio('a', 'b'), 0)  # in [0, 1]
        eq(ratio('a', 'a'), 1)

        eq(setratio(['a', 'b'], ['b', 'a']),
           1.0)  # in [0, 1] compares two sets by best fit, order doesnt matter
        eq(setratio(['c', 'd'], ['b', 'a']),
           0)  # in [0, 1] compares two sets by best fit, order doesnt matter

        eq(seqratio(['a', 'b'], ['b', 'a']), 0.5)  # in [0, 1]
        eq(seqratio(['a', 'b'], ['a', 'b']), 1.0)  # in [0, 1]
        eq(seqratio(['a'], ['a', 'b']), 2 / 3)

Esempio n. 33

0

Mostra file

File: main.py Progetto: teeeye/title2title

def find_nearest(title, title_dict):
    if title is None:
        return None
    if title in title_dict:
        return title_dict[title]

    min_dist = EDIT_DISTANCE_RATIO_THRESHOLD
    res = None
    for key in title_dict.keys():
        dist = distance(title, key) / max(len(title), len(key))
        if dist < min_dist:
            min_dist = dist
            res = title_dict[key]
    return res

Esempio n. 34

0

Mostra file

File: signals.py Progetto: TuulaP/embellishments

def compare_metadata(prev, potential, mood):
  linked = zip(prev['metadata'], potential['metadata'])
  diffs = []
  for idx, items in enumerate(linked):
    old, new = items
    if idx == 5:
      # dates
      olddate, newdate = 0,0
      if len(old) == 4:
        olddate = int(old)
      if len(new) == 4:
        newdate = int(new)
      diffs.append(np.abs(newdate-olddate))
    else:
      try:
        if old != "" and new != "":
          diffs.append(distance(unicode(old), unicode(new)) / float(len(old) + len(new) + 1) )
        else:
          diffs.append(100000)
      except:
        diffs.append(distance((old), str(new)))
  weighted_diffs = map(lambda x: x[0]*x[1], zip(diffs, mood['metadata']))
  return weighted_diffs

Esempio n. 35

0

Mostra file

File: codes.py Progetto: beccajcarlson/lasagna

def test_khash(xs, D2, attempts=1e6):
    n = len(xs)
    tests = 0
    for _ in range(int(attempts)):
        i, j = np.random.randint(n, size=2)
        a, b = xs[i], xs[j]
        d = distance(a, b)
        if 0 < d < 3:
            key = tuple(sorted((a,b)))
            if key not in D2:
                print 'fuckyou'
            else:
                tests += 1
    return tests

Esempio n. 36

0

Mostra file

File: phishing.py Progetto: Ameya-Pandya/shiva

def one_char_typosquatting(s_a='', s_b=''):
    """
    function searches for one character typosquatting for strings of length at least 4
    
    types of one char typosquatting:
        inplace one char:  
            paypal -> paypel
            paypal -> paypai
            paypal -> qaypal
        
        inflate one char: 
            paypal -> paypal2
            paypal -> payypal
            paypal -> ppaypal
        
        deflate one char:
            paypal -> payal
            paypal -> papal    
            
        switched neighbour chars:
            paypal -> papyal
            paypal -> payapl
    """
    if not s_a or not s_b or s_a == s_b:
        # nothing to compute
        return False

    if len(s_a) < 4 and len(s_b) < 4:
        return False

    # Levenshtein distance handle inplace, inflate and deflate one char
    if distance(strip_accents(s_a), strip_accents(s_b)) == 1:
        return True

    # try for find switched neighbours
    if not len(s_a) == len(s_b):
        return False

    for i in range(0, len(s_a) - 1):
        t = s_a[i:i + 2][::-1]
        switched_neighbours = ''.join((
            s_a[:i] if i > 0 else '',
            t,
            s_a[i + 2:],
        ))

        if switched_neighbours == s_b:
            return True

    return False

Esempio n. 37

0

Mostra file

File: Sequence_Magic.py Progetto: pkMyt1/VCF_Analysis

def match_maker(query, unknown):
    """
    This little ditty gives us some wiggle room in identifying our indices and any other small targets.
    :param query
    :param unknown
    :return:
    """

    query_mismatch = distance(query, unknown)

    # Unknown length can be longer than target length.  Need to adjust mismatch index to reflect this.
    adjusted_query_mismatch = query_mismatch-(len(unknown) - len(query))

    return adjusted_query_mismatch

Esempio n. 38

0

Mostra file

def eval_unnatural(stem, most_freq):
    """
    高頻度の単語に発音が似ていないか
    """
    stem_meta = dmeta(stem)[0]
    most_freq_meta = [dmeta(x)[0] for x in most_freq]
    distances = [distance(stem_meta, x) for x in most_freq_meta]

    if 1 in distances:
        # 高頻度の単語に似ている.規則化すると不自然
        return -5.0
    else:
        # 高頻度の単語に似ていない
        return 0

Esempio n. 39

0

Mostra file

File: lemmatizer.py Progetto: groschene/cltk2

 def last_chance(self, kw):
     w = self.dictionnary
     no_find = True
     i = 0
     while no_find:
         test_wd = clean(basify(kw)).lower()
         keep = np.where(np.asarray([distance(test_wd, s) for s in w]) == i)
         if len(keep[0]) > 0:
             no_find = False
             final = keep[0][0]
         i = i + 1
         if i > 3:
             final = 'unk'
     return final

Esempio n. 40

0

Mostra file

    def suggest_v1(self, word):
        # intrie = lambda x: x in self.trie or x in self.secondary_trie or x in self.book_trie
        def intrie(candidate):
            if (candidate in self.secondary_trie or candidate in self.trie
                    or candidate in self.book_trie):
                return True
            else:
                return False

        candidates = list(self.edits1(word) or self.edits2(word))
        in_dictionary = list(filter(intrie, candidates))
        suggestions = sorted(in_dictionary, key=lambda x: distance(x, word))
        n = min(10, len(suggestions))
        return suggestions

Esempio n. 41

0

Mostra file

File: Tang_scATAC_splitBC_pipeline_gzipVer.py Progetto: Tarela/RivanaCode

def single_barcode_adj(b, BClist):
    if b in BClist:
        return [b, "match"]
    else:
        dis_to_refer = []
        for referBC in BClist:
            dis_to_refer.append(distance(b, referBC))
        sorted_dis_to_refer = sorted(dis_to_refer)
        if sorted_dis_to_refer[0] <= 1 and \
           sorted_dis_to_refer[1] - sorted_dis_to_refer[0] >= 1:# \
            #and dis_to_refer.index(sorted_dis_to_refer[0]) <= 1:
            return [BClist[dis_to_refer.index(sorted_dis_to_refer[0])], "adj"]
        else:
            return ["NA", "rm"]

Esempio n. 42

0

Mostra file

def filterLevenshtein(msg, filterWords, englishWords, levenshteinDistance):
    #Filter special characters from string
    filteredMsg = ''.join(e for e in msg if e.isalnum())
    #for each word in filter list
    for word in filterWords:
        #for all criteria check if it's a proper english word before filtering it
        #If a word matches a filter criteria, and is not an english word, the asterisk string is returned
        if abs(len(word) - len(filteredMsg) <= levenshteinDistance):
            if distance(word, filteredMsg) <= levenshteinDistance:
                if not WordChecker.check_word_exists_in(
                        englishWords, filteredMsg):
                    return generateRandomAsteriskString()
    # Otherwise, return original string
    return msg

Esempio n. 43

0

Mostra file

def _get_closest_string(string,
                        iterable,
                        length_dependant: bool = True,
                        preprocess=lambda s: s.lower()):
    string = preprocess(string)
    iterable = list(filter(lambda x: x != None, iterable))
    distances = sorted({
        s: distance(string, preprocess(s)) /
        (max(len(preprocess(s)), 0.01) if length_dependant else 1)
        for s in iterable
    }.items(),
                       key=lambda i: i[1])
    if len(distances) > 0: return distances[0][0]
    return string

Esempio n. 44

0

Mostra file

File: __init__.py Progetto: paper2code/jina-hub

 def score(
         self, query_meta: Dict, old_match_scores: Dict, match_meta: Dict
 ) -> "np.ndarray":
     from Levenshtein import distance
     new_scores = [
         (
             match_id,
             -distance(
                 query_meta['text'], match_meta[match_id]['text']
             ),
         )
         for match_id, old_score in old_match_scores.items()
     ]
     return np.array(new_scores, dtype=np.float64)

Esempio n. 45

0

Mostra file

def weather():      #天气现象查询
    print('\n本脚本只提供查询编码功能')
    while 1:
        dic={'露':'01','霜':'02','结冰':'03','大风':'15','积雪':'16','雾凇':'48','雨凇':56,'冰雹':89,'霾':'05','浮尘':'06','扬沙':'07','轻雾':10,'沙尘暴':31,'雾':42,'毛毛雨':50,'雨':60,'雨夹雪':68,'雪':70,'阵雨':80,'阵性雨夹雪':83,'阵雪':85}
        key=input('\n请输入要查询的天气现象,或输入-1退出:')
        if key=='-1':break
        try:
            print('编码为:',dic[key])
        except KeyError as e:
            print('未找到相关关键词:',e,'\n\n你可能是想写:')
            for i in dic.keys():
                f=distance(key.encode('unicode_escape'),i.encode('unicode_escape'))
                if f<5:
                    print(i,end='  ')

Esempio n. 46

0

Mostra file

File: poetics.py Progetto: roshcagra/Shakespearebot

def levenshtein(string, candidates):
    """
    Compare a string's Levenshtein distance to each candidate in a dictionary. Expands the length of each candidate to match the length of the compared string
    Returns the name of the closest match
    """

    distances = defaultdict(int)
    num_lines = len(string)

    for k, v in candidates.items():
        expanded = (v * (num_lines // len(v) + 1))[:num_lines]
        distances[k] += distance(string, expanded)

    return get_lowest(distances)

Esempio n. 47

0

Mostra file

def get_reduced_distances(chunk, edit_distance):
    """
    Now that we've reduced the data set, we need to actually look at these tags
    that appear to have the most other tags some > min(edit_distance) from
    them.  We're going to do this by comparing each tag to the "base" tag
    that got it included in this set to begin with, and also comparing each 
    tag that we keep to all other tags that we keep to ensure that none are
    less than min(edit_distance from one another).  This was a stuggle to do
    simply and without consuming LOTS of RAM (e.g. numpy arrays), but the
    solution is rather simple.
    """
    all_keepers = []
    # get only those tags, compared to the base tag that have 
    # edit_distance >= our minimum - we're essentially regenerating
    # and filtering the pairwise comparisons above
    good_comparisons = [c for c in chunk[1] if \
        distance(chunk[0],c[1]) >= edit_distance]
    # we know that the first tag is good (it is the basis for comparison),
    # so keep that one
    keepers = [chunk[0]]
    # now, loop over all the tags in the reduced set, checking each against
    # the tags already in 'keepers' for the proper edit distance
    for tag in good_comparisons:
        #pdb.set_trace()
        temp_dist = []
        skip = False
        for keep in keepers:
            d = distance(keep,tag[1])
            if d < edit_distance:
                skip = True
                # no need to continue if we're already < edit_distance
                break
        if not skip:
            keepers.append(tag[1])
    # see docstring for pickler
    tf = pickler(keepers)
    return tf

Esempio n. 48

0

Mostra file

def iterate_insde_dict(collected_words_list, handled_ids, word, lemma, pos,examined_word_len, letter_count_dict, search_range, debug = DEBUG):
    collected_words = 0
    #print(word, examined_word_len, "search_range", search_range)
    if str(examined_word_len) in letter_count_dict and examined_word_len > 2:
        compare_words = letter_count_dict[str(examined_word_len)]
        random.shuffle(compare_words)
        for word_compare_el in compare_words:
            word_compare = word_compare_el[0]
            if "unknown" not in word_compare_el[1]:#SPECIFIC
                pos_compare = word_compare_el[1]
            else:
                pos_compare = None
            lemm_comapre = word_compare_el[2]
            comp_word_id = word_compare_el[3]
            comp_ref_id = word_compare_el[4]
            comp_set_id = word_compare_el[5]
            if search_range > 3:
                pos_search_range = search_range
            else:
                pos_search_range = 1
            
            if pos and pos_compare:
                if distance(lemm_comapre, lemma) > 0 and distance(lemm_comapre, lemma)  <= search_range and distance(pos_compare, pos) <= pos_search_range and comp_word_id not in handled_ids:
                    if debug:print("FOUND VS POS", word,word_compare,lemma, lemm_comapre, pos,pos_compare)
                    collected_words_list.append({"word_id":comp_word_id, "ref_id":comp_ref_id,"setting_id":comp_set_id, "ngramm": word_compare})
                    handled_ids.append(comp_word_id)
                    collected_words += 1
                    if (collected_words > 10 or len(collected_words_list) > 12):break
            else:
                if distance(lemm_comapre, lemma) > 0 and distance(lemm_comapre, lemma)  <= search_range and comp_word_id not in handled_ids:
                    if debug:print("FOUND NON POS", word,word_compare,lemma, lemm_comapre)
                    collected_words_list.append({"word_id":comp_word_id, "ref_id":comp_ref_id,"setting_id":comp_set_id, "ngramm": word_compare})
                    handled_ids.append(comp_word_id)
                    collected_words += 1
                    if (collected_words > 10 or len(collected_words_list) > 12):break
    #print("collected_words_list",collected_words_list)
    return collected_words

Esempio n. 49

0

Mostra file

File: goodCollapseDictionary.py Progetto: liggettla/FERMI

def buildListDict(input_file, distance_stringency, pickleOut):
    #Dict format: {'UMI_1' : (Seqs, First_Header, First_quality), 'UMI_2' : (Seqs, First_Header, First_quality)}
    sequences = defaultdict(lambda:([],[],[]))
    target = open(input_file, 'r')
    umi_list = []
    position = 1
    is_unique = True

    for line in target:
        if position == 1:
            header = line.rstrip('\n')
            position += 1
        elif position == 2:
            #Assumes UMI is flanking first and last 6bp of read
            #umi_seq = line[0:11]+line[-7:] #Abs dist from start/end compatible with miSeq/hiSeq
            umi_seq = line[0:11]+line.rstrip('\n')[-11:] #Abs dist from start/end compatible with miSeq/hiSeq
            umi_seq = umi_seq.rstrip('\n')
            read_seq = line.rstrip('\n')[6:-6]
            position += 1
        elif position == 3:
            position += 1
        elif position == 4:
            quality = line.rstrip('\n')
            position = 1

            if not bool(umi_list):
                umi_list.append(umi_seq)
            else:
                is_unique = True
                for umi in umi_list:
                    if is_unique:
                        if distance(umi_seq, umi) <= distance_stringency:
                            is_unique = False
                            umi_seq = umi

            # it is important for duplex collapsing to make sure reads are of the same length
            # when not duplex collapsing this should always be true
            if not is_unique and len(sequences[umi_seq][0][0]) == len(read_seq):
                sequences[umi_seq][0].append(read_seq)
            elif is_unique:
                sequences[umi_seq][0].append(read_seq)

            #check if header slot is empty prevents multiple entries error
            if is_unique and not bool(sequences[umi_seq][1]):
                sequences[umi_seq][1].append(header)
                sequences[umi_seq][2].append(quality)

    target.close()
    return sequences

Esempio n. 50

0

Mostra file

File: preprocess.py Progetto: YY-TMU/SAPAS

def attach_UMI_barcode(Read1, Read2, barcodes, mismatch_rate=1):
    mismatch_rate = int(mismatch_rate)
    f1 = open(Read1)
    f2 = open(Read2)

    line1 = f1.readline()
    line2 = f2.readline()

    while (line1):
        line1 = f1.readline()
        target = line1[0:6]
        mismatch = [
            distance(target, barcodes[idx]) for idx in range(len(barcodes))
        ]

        if (min(mismatch) <= mismatch_rate):
            barcode = barcodes[mismatch.index(min(mismatch))]
            output_file = barcode + ".align.fastq"
            f3 = open(output_file, "a")

            UMI = line1[6:12]
            first_line = "@" + barcode + "," + UMI + "," + line2[1:]
            f3.write(first_line)

            second_line = f2.readline()
            f3.write(second_line)

            third_line = f2.readline()
            third_line = "+" + barcode + "," + UMI + "," + third_line[1:]
            f3.write(third_line)

            fourth_line = f2.readline()
            f3.write(fourth_line)

            line2 = f2.readline()

        else:
            line2 = f2.readline()
            line2 = f2.readline()
            line2 = f2.readline()
            line2 = f2.readline()

        line1 = f1.readline()
        line1 = f1.readline()
        line1 = f1.readline()

    f1.close()
    f2.close()
    f3.close()

Esempio n. 51

0

Mostra file

def calc_distance_matrix(data):
    """Calculate a distance matrix between languages.
    1. Calculates Levensthein distance for every word between two languags (assuming they both have
    the given concept)
    2. Normalizes the Levensthein distance: words_d / longest_word_d
    3. Calculates language distance: word1_d + word2_d + ... + wordn_d / amount of words

    Returns: the distance matrix (Pandas DataFrame)
    """
    d_matrix = np.zeros(shape=(len(data.keys()), len(data.keys())))
    i = 0

    for lang1, lang1_dict in data.items():
        j = 0
        for lang2, lang2_dict in data.items():
            lv_distances = []

            for w_concept1, word1_l in lang1_dict.items():
                word2_l = lang2_dict.get(w_concept1, None)

                # if word concept not in list
                if word2_l is None:
                    continue

                dis_list = []

                # some words have more than one translation for a given concept
                # I take the smallest distance as given one
                for w1 in word1_l:
                    for w2 in word2_l:
                        dis_list.append(distance(w1, w2))

                lv_distances.append(np.min(dis_list))

            longest_wd = np.max(lv_distances)

            if longest_wd > 0:
                lv_distances = [x / longest_wd for x in lv_distances]

            lang_dist = np.sum(lv_distances) / len(lv_distances)

            d_matrix[i][j] = lang_dist
            j += 1
        i += 1

    d = DataFrame(d_matrix)
    d.index = data.keys()
    d.columns = data.keys()
    return d

Esempio n. 52

0

Mostra file

File: address.py Progetto: sayr777/buildings_parser

    def distance(self, other):
        p1 = self.get_item_parts()
        p2 = other.get_item_parts()
        # drop Id:
        p1 = p1[1:]
        p2 = p2[1:]

        dists = []
        for i in range(len(p1)):
            s1 = p1[i] if p1[i] != "" else None
            s2 = p2[i] if p2[i] != "" else None
            if (s1 is not None) and (s2 is not None):
                dists.append(float(distance(s1, s2))/max(len(s1), len(s2)))

        return dists

Esempio n. 53

0

Mostra file

    def insert(self, word):
        """Returns parent of new node."""
        if self.word is None:
            self.word = word
            return 'none'
        if self.word == word:
            return 'not_inserted'
        else:
            dist = distance(word, self.word)
            for child in self.children:
                if child[1] == dist:
                    return child[0].insert(word)

            self.children.append((SpellTree(word), dist))
            return self.word

Esempio n. 54

0

Mostra file

File: play_music.py Progetto: rayansamy/Nathan

def find_music(app, root):
    tolaunch=""
    minimum=999999
    path = root
    papth = os.walk(path)
    for root, dirnames, filenames in os.walk(path):
        for filename in filenames:
            filePath = os.path.join(root, filename)
            r=int(distance(filename, app))
            #print("app : "+str(name)+" distance : "+str(r))
            if r<minimum:
                tolaunch=filePath
                minimum=r
                print("actual best : "+str(tolaunch)+ " distance : "+str(minimum))
    os.system("open \""+tolaunch+"\"")

Esempio n. 55

0

Mostra file

File: bpmn_compare_similarity.py Progetto: mmmaaaggg/bpmn-python

 def get_syn_sim(self, node1, node2):
     """
     根据传入的节点返回句法相似性
     :param node1:
     :param node2:
     :return:
     """
     node1 = node1[1]['node_name']
     node2 = node2[1]['node_name']
     distance_val = distance(node1, node2)
     if len(node1) == 0 and len(node2) == 0:
         return 1
     else:
         syn_sim = 1 - distance_val / max(len(node1), len(node2))
         return syn_sim

Esempio n. 56

0

Mostra file

def validate_password_dictionary(value):
    """
    Insures that the password is not too similar to a defined set of dictionary words
    """
    password_max_edit_distance = getattr(
        settings, "PASSWORD_DICTIONARY_EDIT_DISTANCE_THRESHOLD", None)
    password_dictionary = getattr(settings, "PASSWORD_DICTIONARY", None)

    if password_max_edit_distance and password_dictionary:
        for word in password_dictionary:
            edit_distance = distance(text_type(value), text_type(word))
            if edit_distance <= password_max_edit_distance:
                raise ValidationError(
                    _("Too similar to a restricted dictionary word."),
                    code="dictionary_word")

Esempio n. 57

0

Mostra file

File: parse_speaker_names.py Progetto: frenchrevdata/ap

def compute_speaker_Levenshtein_distance(speaker_name):
	full_speaker_names = read_names("APnames.xlsx")
	# speaker_last_names = read_names("an_last_names.xls")

	distance_size = {}
	for i, speaker in enumerate(full_speaker_names['Full Name']):
		# Levenshtein distance
		# speaker = unicodedata.normalize("NFKD", speaker).encode("ascii", "ignore")
		dist = distance(speaker, speaker_name)
		distance_size[speaker] = dist

	for j, speaker in enumerate(full_speaker_names.index.values):
		# Levenshtein distance
		# speaker = unicodedata.normalize("NFKD", speaker).encode("ascii", "ignore")
		dist = distance(speaker, speaker_name)
		full_name = full_speaker_names["Full Name"].iloc[j]
		if full_name in distance_size:
			if dist < distance_size[full_name]:
				distance_size[full_name] = dist
		else:
			distance_size[full_name] = dist
	dist_size_sorted = sorted(distance_size.items(), key = lambda kv: kv[1])

	return dist_size_sorted[:2]

Esempio n. 58

0

Mostra file

 def closest_hexameter_patterns(self, scansion: str) -> list:
     """Find the closest group of matching valid hexameter patterns.
     :return: list of the closest valid hexameter patterns; only candidates with a matching
     length/number of syllables are considered."""
     pattern = scansion.replace(" ", "")
     pattern = pattern.replace(self.constants.FOOT_SEPARATOR, "")
     ending = pattern[-1]
     candidate = pattern[:len(pattern) - 1] + self.constants.OPTIONAL_ENDING
     cans = [(distance(candidate, x), x) for x in self.VALID_HEXAMETERS
             if len(x) == len(candidate)]
     if cans:
         cans = sorted(cans, key=lambda tup: tup[0])
         top = cans[0][0]
         return [can[1][:-1] + ending for can in cans if can[0] == top]
     return []

Esempio n. 59

0

Mostra file

File: SpellCheck.py Progetto: Remimstr/Standardize_Metadata

 def search(self, word):
     original = word
     word = word.lower()
     candidates = {}
     if (self.inList(word)):
         candidates[self.get(word)] = 0
     else:
         edits = self.edits(word)
         for word in edits.values():
             if (self.inList(word) and word[0] == original[0].lower()):
                 word, original = self.get(word).lower(), original.lower()
                 d = distance(word, original)
                 l = len(original)
                 candidates[self.get(word)] = d / l * 100
     return candidates

Esempio n. 60

0

Mostra file

File: clonify.py Progetto: eodus/clonify-python

def get_LD(i, j):
    '''
    Calculate sequence distance between a pair of Seq objects
    '''
    # pairwise2 is used to force 'gapless' distance when sequence pair is of the same length
    if i.junc_len == j.junc_len:
        identity = pairwise2.align.globalms(i.junc, j.junc, 1, 0, -50, -50,
                                            score_only=True,
                                            one_alignment_only=True)
        if type(identity) != float:
            identity = 0.0
        return i.junc_len - identity
    # Levenshtein distance is used for sequence pairs of different lengths
    else:
        return distance(i.junc, j.junc)