Ejemplos de defaultdict en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: Sequence_Reconstruction.py Proyecto: hanrick2000/LC-1

    def sequenceReconstruction(self, org, seqs):
        # edges: {} key: from, val: to list
        # indegrees: {} key: node, val: cnt of coming
        # nodes: set total set of node
        edges = defaultdict(list)
        indegrees = defaultdict(int)
        nodes = set()
        for seq in seqs:
            # note that seq is a list of int not a pair
            # merge all int in seq to nodes.
            nodes |= set(seq)
            for i in range(len(seq)):
                if i == 0:
                    indegrees[seq[i]] += 0
                if i < len(seq) - 1:
                    edges[seq[i]].append(seq[i + 1])
                    indegrees[seq[i + 1]] += 1

        cur = [k for k in indegrees if indegrees[k] == 0]
        res = []

        while len(cur) == 1:
            cur_node = cur.pop()
            res.append(cur_node)
            # use defaultdict to avoid edges[cur_node] not populated
            for node in edges[cur_node]:
                indegrees[node] -= 1
                if indegrees[node] == 0:
                    cur.append(node)
        if len(cur) > 1:
            return False
        # len(res) == len(nodes) ensure all nodes considered
        return len(res) == len(nodes) and res == org

Ejemplo n.º 2

0

Mostrar archivo

Archivo: norvig_ibol.py Proyecto: dataartisan/prg

def nreport(neighbors):
    NN, NumN = defaultdict(int), defaultdict(int) ## Nearest, Number of neighbors
    for n in neighbors:
        nn = min(neighbors[n].values() or ['>25'])
        NN[nn] += 1
        for d2 in neighbors[n].values():
            NumN[d2] += 1 
    print
    print "Nearest neighbor counts:", showh(NN)
    print "Number of neighbors at each distance:", showh(NumN)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: norvig_ibol.py Proyecto: dataartisan/prg

def histo(items):
    "Make a histogram from a sequence of items or (item, count) tuples."
    D = defaultdict(int)
    for item in items:
        if isinstance(item, tuple): D[item[0]] += item[1]
        else: D[item] += 1
    return D

Ejemplo n.º 4

0

Mostrar archivo

Archivo: AssignCap.py Proyecto: ausaafnabi/30DaysOfAlgorithms

   def __init__(self): 
 
           self.allmask = 0
 
           self.total_caps = 100
 
           self.caps = defaultdict(list)

Ejemplo n.º 5

0

Mostrar archivo

 def __init__(self, file_name, sheet_name = 'Data',
              columns = None, default_columns = False,
              classic_mode = False, **etc):
  
     
     if columns:
         self.columns = columns[:]
     elif default_columns:
         self.columns = default_columns + (columns or [])
     else:
         raise IOError, "Columns not specified and default columns not selected!"
 
     if len(self.columns) < len(set([x.lower() for x in self.columns])):
         import defaultdict
         counts = defaultdict(int)
         for col in self.columns:
             counts[col] += 1
             
         raise ValueError, "Column titles appear more than once: %s" % [k for (k, v) in counts.items() if v > 1]
     
     
     if classic_mode:
         import mzSpreadsheetClassic
         self.__class__ = mzSpreadsheetClassic.XLSheetWriter
         mzSpreadsheetClassic.XLSheetWriter.__init__(self, file_name, sheet_name,
                                                     columns, default_columns)   
     elif file_name.lower().endswith('.xls'):
         self.__class__ = XLSWriter
         XLSWriter.__init__(self, file_name, sheet_name, self.columns, **etc)
     elif file_name.lower().endswith('.xlsx'):
         self.__class__ = XLSXWriter
         XLSXWriter.__init__(self, file_name, sheet_name, self.columns, **etc)
     else:
         raise IOError, "Invalid extension on filename %s given to XLSheetWriter." % file_name

Ejemplo n.º 6

0

Mostrar archivo

Archivo: p650.py Proyecto: leonlan/projecteuler

def p650(N, p):
    """Compute the sum of all divisors of B(n) mod p for n = 1, ..., N."""
    S = 0
    n2f = n2factors(N)
    for n in tqdm(range(1, N+1)):
        # Compute factors of B(N) using compact expression
        factors = defaultdict(int)
        k = n // 2
        j = (n+1) % 2  # 1 if even

        # Numerator
        for i in range(k):
            power = (2 * (k-i) - j)

            # Numerator elements
            for prime, mult in n2f[n-i].items():
                factors[prime] += mult * power

            # Denominator elements
            for prime, mult in n2f[i+1].items():
                factors[prime] -= mult * power

        # Compute D(N) given p2
        S = (S + compute_sum_of_divisors(factors, p)) % p

    return S

Ejemplo n.º 7

0

Mostrar archivo

Archivo: preprocess.py Proyecto: morganecf/topic-modeling

def findAllWords(data):
    matrix = defaultdict(int)
    for doc in data:
        words = doc.words
        for word in words:
            matrix[word] += 1
    return matrix

Ejemplo n.º 8

0

Mostrar archivo

 def __init__(self, actions):
     self.width = 5
     self.height = 5
     self.learning_rate = 0.01
     self.discount_factor = 0.9
     self.actions = actions
     self.epsilon = 0.1
     #
     self.value_tables = defaultdict(float)

Ejemplo n.º 9

0

Mostrar archivo

 def __init__(self,
              datasetroot=kDatasetPath,
              datasetname='training',
              csvname='reference.csv'):
     self._datasetroot = datasetroot
     self._datasetname = datasetname
     self._csvname = csvname
     self._filelist = []
     self._labels = []
     self._cases = defaultdict(list)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: norvig_ibol.py Proyecto: dataartisan/prg

def creport(drange, dcrange):
    def table(what, fn):
        print "\n" + what
        print ' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange])
        for d in drange:
            print '%s (%2d)' % (pct(d, glen), d),
            for dc in dcrange:
                print '%5s' % fn(cluster(neighbors, d, dc)),
            print
    print '\nNearest neighbor must be closer than this percentage (places). '
    print 'Each column: all genomes in cluster within this percentage of each other.'
    table("Number of clusters", len)
    cluster1 = cluster(neighbors, 8, 15) ## splits Cleora
    print '\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1)
    M, T = defaultdict(int), defaultdict(int)
    for c in cluster1:
        M[margin(c)] += 1; T[margin(c)] += len(c)
    for x in M: print '%d\t%d\t%d'% (x,M[x],T[x])
    print '\nMargins', showh(M)
    for c in cluster1:
        if margin(c) <= 16:
            print showc(c)
    print '\nScatter plot of cluster diameter vs. margin.'
    for c in cluster1:
        if diameter(c) > 0:
            pass
            #print '%d\t%d' % (diameter(c), margin(c))
    print '\nDifference from cluster(neighbors, 11, 14):'
    #table(lambda cl: pct(len(cluster1)-compare(cluster1, cl),max(len(cluster1),len(cl))))
    print '\nNumber of clusters witth more than one species name:'
    #table(lambda cl: sum(nspecies(c) > 1 for c in cl))
    def pct_near_another(clusters, P=1.25):
        total = 0
        for c in clusters:
            d = diameter(c)
            for g in c:
                for g2 in neighbors[g]:
                    if g2 not in c and dist(g, g2) < P*d:
                        total += 1
        return pct(total, n)
    def f(P):
        print '\nPercent of individuals within %.2f*diameter of another cluster.'%P
        table(lambda cl: pct_near_another(cl, P))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: RET_iTP_functions.py Proyecto: axvdstel/RET-iTP

def scorer(strand, orfs, freq_reads):

    if strand == 'plus':
        orf_starts_plus = set([x for x in orfs.keys()])
        negatives_plus = defaultdict(lambda:1, {x:[] for x in range(genome_l)})
        for pos in range(genome_l-1):
            try:
                no_go = set([pos+x for x in range(-4,3)])
                if pos -2 in orf_starts_plus:
                    orfs[pos-2].extend([x for x in freq_reads.loc['%s-plus' %(pos)]])

                elif len(no_go.intersection(orf_starts_plus)) > 0:
                    continue
                else:
                    if True in (x in genome[pos-3:pos+9] for x in ['TAG', 'TGA', 'TAA']):
                        continue
                    else:
                        negatives_plus[pos].extend([x for x in freq_reads.loc['%s-plus' %(pos)]])
            except:
                print(pos, strand)
                continue

        return orfs_plus, negatives_plus
    if strand == 'min':
        orf_starts_min = set([x for x in orfs.keys()])
        negatives_min = defaultdict(lambda:1, {x:[] for x in range(genome_l)})
        for pos in range(genome_l-1):
            try:
                no_go = set([pos+x for x in range(-1,6)])
                if pos +3 in orf_starts_min:
                    orfs[pos+3].extend([x for x in freq_reads.loc['%s-min' %(pos)]])
                elif len(no_go.intersection(orf_starts_min)) > 0:
                    continue
                else:
                    if True in (x in revcom(genome[pos-9:pos+3]) for x in ['TAG', 'TGA', 'TAA']):
                        continue
                    else:
                        negatives_min[pos].extend([x for x in freq_reads.loc['%s-min' %(pos)]])
            except:
                 print(pos, strand)
                 continue
        return orfs_min, negatives_min

Ejemplo n.º 12

0

Mostrar archivo

Archivo: norvig_ibol.py Proyecto: dataartisan/prg

def sreport(species):
    SS = defaultdict(int)
    print
    for s in set(species):
        c = [g for g in range(n) if species[g] == s]
        d = diameter(c)
        if d > 14:
            if d==glen: d = '>25'
            print 'diameter %s for %s (%d elements)' % (d, s, len(c))
        SS[d] += 1
    print 'Diameters of %d labelled clusters: %s' % (len(set(species)), showh(SS))

Ejemplo n.º 13

0

Mostrar archivo

Archivo: utils.py Proyecto: morganecf/topic-modeling

def domain_frequencies(documents):
    """
    Given set of documents, creates a dict with frequency count of each domain 
    (in url) found in documents. 
    """
    domains = defaultdict(int)
    for doc in documents:
        if doc.get("url"):
            domain = get_domain(doc.get("url"))
            domains[domain] += 1
    return domains

Ejemplo n.º 14

0

Mostrar archivo

 def topSolution(self, numCourses, prerequisites):
     import collections import defaultdict
     graph = defaultdict(list)
     for u, v in prerequisites:
         graph[u].append(v)
     # 0 = Unknown, 1 = visiting, 2 = visited
     visited = [0] * numCourses
     path = []
     for i in range(numCourses):
         if not self.dfs(graph, visited, i, path):
             return []
     return path

Ejemplo n.º 15

0

Mostrar archivo

Archivo: top_artists.py Proyecto: mvimplis2013/port_scanner

    def run(self):
        artist_count = defaultdict(int)

        for t in self.input():
            with t.open('r') as in_file:
                for line in in_file:
                    _, artist, track = line.strip().split()
                    artist_count[artist] += 1

        with self.output().open('w') as out_file:
            for artist, count in six.iteritems(artist_count):
                out_file.write('{}\t{}\n'.format(artist, count))

Ejemplo n.º 16

0

Mostrar archivo

Archivo: 1042.py Proyecto: datpham19/leetcode

    def gardenNoAdj(self, N: int, paths: List[List[int]]) -> List[int]:
        G = defaultdict(list)
        for path in paths:
            G[path[0]].append(path[1])
            G[path[1]].append((path[0]))
        colored = defaultdict()

        def dfs(G, V, colored):
            colors = [1, 2, 3, 4]
            for neighbour in G[V]:
                if neighbour in colored:
                    if colored[neighbour] in colors:
                        colors.remove(colored[neighbour])
            colored[V] = colors[0]

        for V in range(1, N + 1):
            dfs(G, V, colored)

        ans = []
        for V in range(len(colored)):
            ans.append(colored[V + 1])

        return ans

Ejemplo n.º 17

0

Mostrar archivo

Archivo: 498. Diagonal Traverse.py Proyecto: CCTSAI-Tony/leetcode

 def findDiagonalOrder(self, matrix: List[List[int]]) -> List[int]:
     if not matrix or not matrix[0]:
         return None
     dic = defaultdict(list)
     res = []
     m, n = len(matrix), len(matrix[0])
     for i in range(m):
         for j in range(n):
             dic[i+j].append(matrix[i][j])
     
     for k in sorted(dic.keys()):
         if k % 2 == 0:
             dic[k].reverse()
         res += dic[k]
     return res

Ejemplo n.º 18

0

Mostrar archivo

Archivo: Graphs1.py Proyecto: mehul523/100DaysOfAlgo

 def solve(self, t, E):
     es = defaultdict(list)
     
     for e in E:
         es[e[0]].append(e[1])
     
     stack = [1]
     visited = set([1])
     while stack:
         u = stack.pop()
         for v in es[u]:
             if not v in visited:
                 stack.append(v)
                 visited.add(v)
                 
     return 1 if t in visited else 0

Ejemplo n.º 19

0

Mostrar archivo

Archivo: HourRank_30_P1.py Proyecto: vsd550/HackerRank

def solve(names):
    trie = {}
    res = []
    for name in names:
        node = trie
        printed = False
        for i, x in enumerate(name, start=1):
            if x not in node:
                if not printed: 
                    res.append(name[:i])
                    printed = True
                node[x] = defaultdict(int)
            node = node[x]
        node['count'] += 1

        if not printed: 
            res.append (name + (' '+str(node['count']) if node['count']!=1 else ''))
    return res

Ejemplo n.º 20

0

Mostrar archivo

Archivo: __init__.py Proyecto: Yatagarasu50469/multiplierz

    def __init__(self, report_file, columns, default_columns, **kwargs):
        self.file_name = report_file
        if columns:
            self.columns = columns[:]
        elif default_columns:
            self.columns = default_columns + (columns or [])
        self.extraArgs = kwargs

        if len(self.columns) < len(set([x.lower() for x in self.columns])):
            import defaultdict
            counts = defaultdict(int)
            for col in self.columns:
                counts[col] += 1

            raise ValueError("Column titles appear more than once: %s" %
                             [k for (k, v) in list(counts.items()) if v > 1])

        self.data = []

Ejemplo n.º 21

0

Mostrar archivo

 def invalidTransactions(self, transactions):
     """
     :type transactions: List[str]
     :rtype: List[str]
     """
     users = defaultdict(list)
     
     for tran in transactions:
         usr, ts, amt, cty = [_ for _ in tran.split(',')]
         ts = int(ts)
         amt = int(amt)
         
         users[usr].append((ts, amt, cty))
         
     res = []
     
     for usr in users:
         #latest = deque([(float('-inf'), 0, '')])             
         #print latest
         left = right = 0
         users[usr].sort()
         
         i = 0
         for ts, amt, cty in users[usr]:
             #print ts, amt, cty 
             if amt > 1000:
                 res.append(','.join([usr, str(ts), str(amt), cty]))
                 continue
                 
             while ts - users[usr][left][0] > 60:
                 left += 1
             
             while right < len(users[usr]) and users[usr][right][0] - ts < 60:
                 right += 1
                 
             for record in users[usr][left:right]:
                 if cty != record[2]:
                     res.append(','.join([usr, str(ts), str(amt), cty]))
                     break
             i += 1
             
     return res

Ejemplo n.º 22

0

Mostrar archivo

Archivo: rhyme.py Proyecto: excursus/nlpypes

 def populate(self, pdict: PDict, predicate):
     self.adj = defaultdict(set)
     words = sorted(
         pdict.cmu.values(),
         key=lambda w: list(reversed(w.prns[0].stressless_repr())))
     for i, word1 in enumerate(words):
         if i % 1000 == 0: print(i * 100.0 / len(words))
         for word2 in words[i:]:
             #if word2.word == 'school':
             #    import pdb; pdb.set_trace()
             #                print(list(reversed(word1.prns[0].stressless_repr().split())))
             #                print(word1, word1.prns[0].stressless_repr()[-1])
             #                print(word2, word2.prns[0].stressless_repr()[-1])
             #                print('')
             if word1.prns[0].stressless_repr(
             )[-2:] != word2.prns[0].stressless_repr()[-2:]:
                 break
             if predicate(word1, word2):
                 self.adj[word1].add(word2)
                 self.adj[word2].add(word1)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: RET_iTP_functions.py Proyecto: axvdstel/RET-iTP

def orf_finder(genome):
    seq = genome.seq
    min_orf = 3
    max_orf = 10000
    orfs = defaultdict()
    starts = [codon.start() for codon in re.finditer('ATG|GTG|TTG', seq)]

    for x in starts:
        stops = [codon.start()+x for codon in re.finditer('TAG|TGA|TAA', seq[x:x+max_orf])]

        for y in stops:
            if (y-x) > 0 and (y-x) % 3 == 0:
                if (y-x) < min_orf:
                    break
                if min_orf <= (y-x) <= max_orf:
                    orfs[x]=[y+2]
                    break
                elif (y-x) >max_orf:
                    break
    return orfs

Ejemplo n.º 24

0

Mostrar archivo

Archivo: HourRank_30_P1.py Proyecto: vsd550/HackerRank

def solve(names):
    s = set()
    d = defaultdict(int)
    l = list()
    for name in names:
        if name in d:
            d[name] += 1
            l.append(name+" "+str(d[name]))
        else:
            d[name] = 1
            t = ""
            inserted = False;
            for i in range(len(name)):
                t += name[i:i+1]
                if t not in s and not inserted:
                    inserted = True
                    l.append(t)
                s.add(t)
            if not inserted: 
                l.append(name)
    return l

Ejemplo n.º 25

0

Mostrar archivo

Archivo: dsAlgo_groupRecordBasedOnAField.py Proyecto: dataAlgorithms/data

        {'date': '07/02/2012', 'address': '5800 E 58TH'}
        {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}
        {'date': '07/02/2012', 'address': '1060 W ADDISON'}
07/03/2012
        {'date': '07/03/2012', 'address': '2122 N CLARK'}
07/04/2012
        {'date': '07/04/2012', 'address': '5148 N CLARK'}
        {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}

In [16]:
    ...: from collections import defaultdict


In [18]: %paste

rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)

## -- End pasted text --

In [19]:
    ...: print(rows_by_date)
defaultdict(<class 'list'>, {'07/02/2012': [{'date': '07/02/2012', 'address': '5
800 E 58TH'}, {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}, {'date': '
07/02/2012', 'address': '1060 W ADDISON'}], '07/03/2012': [{'date': '07/03/2012'
, 'address': '2122 N CLARK'}], '07/04/2012': [{'date': '07/04/2012', 'address':
'5148 N CLARK'}, {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}], '07/01/
2012': [{'date': '07/01/2012', 'address': '5412 N CLARK'}, {'date': '07/01/2012'
, 'address': '4801 N BROADWAY'}]})

Ejemplo n.º 26

0

Mostrar archivo

fromm collections import defaultdict

incomes = [('Books', 1250.00),
           ('Books', 1300.00),
           ('Books', 1420.00),
           ('Tutorials', 560.00),
           ('Tutorials', 630.00),
           ('Tutorials', 750.00),
           ('Courses', 2500.00),
           ('Courses', 2430.00),
           ('Courses', 2750.00),]

dd = defaultdict(float)
for product, income in incomes:
    dd[product] += income

for product, income in dd.items():
    print(f'Total income for {product}: ${income:,.2f}')

Ejemplo n.º 27

0

Mostrar archivo

Archivo: Wang_Xang_for_HW1_graded.py Proyecto: xw187/Xuchen_Wang_Python

import defaultdict

#Stuart- I had to edit the code myself because I received an indentation error on your documentation in
# quotes. Be careful because python is very particular about indentation. The way you called the print function
# also does not work for python 3. Python 3 calls it differently than 2.7
def anagram(name):
	# get every word from the list and store them in wordbank
	wordbank = []
	with open(name) as file:
		for line in file:
			wordbank.append(line.rstrip())
      '''
      use defaultdict fun. to creat a dic and the key is the same characters that
      the words share. The value is every word that shares the same char.
      '''
	dict_anagram = defaultdict(list)
	for word in wordbank:
		key = ''.join(sorted(word))
		dict_anagram[key].append(word)

	# give length an initial value as 0
	length = 0
     # find the largest length
	for word1, word2 in anadict.items():
		if len(word2) > length:
			length = len(word2)

	# print the anagrams with the largest length
	for word1, word2 in anadict.items():
		if len(word2) > length-1:
			print word1, word2

Ejemplo n.º 28

0

Mostrar archivo

# 기본 딕셔너리

stats = {}
key = 'my_counter'
if key not in stats:
    stats[key] = 0
stats[key] += 1


from collections from defaultdict

stats = defaultdict(int)
stats['my_counter'] += 1

Ejemplo n.º 29

0

Mostrar archivo

Archivo: VarianceWeighting.py Proyecto: goelrhea1992/MovieRecommender

#!/usr/bin/python
from math import sqrt
from math import cos
import os
import math
import collections
import codecs 
feom collections import defaultdict

os.chdir("F:\\college\\Sem7\\CF\\ml-100k\\ml-100k");
vmin = 1000.0
vmax = 0.0
movieVariances = defaultdict()
movieRatings = defaultdict(list)

def avg(ratings):
  s = 0.0
  for i in ratings:
    s = s + i
    return (s * 1.0)/len(ratings)

def variance(ratings):
  s = 0.0
  a = avg(ratings)
  for i in ratings:
    s = s + ((a-i)**2)
    return (s * 1.0)/len(ratings)

def computeNearestNeighbor(users, username):
  """creates a sorted list of users based on their distance to username"""
  distances = []

Ejemplo n.º 30

0

Mostrar archivo

Archivo: day18.py Proyecto: chloeemariee/Advent-of-Code

# Advent of Code 2019 Day 18 
# Many-Worlds Interpretation

# planning
import defaultdict 
import sys

# define variables https://topaz.github.io/paste/#XQAAAQCnGQAAAAAAAAAzHIoib6pXbueH4X9F244lVRDcOZab5q1+VXY/ex42qR7D/JhOUAl0PRlKyZmMcX/t+JUQyym/jh2oG/1cutq3qMxmEFpEjHMJSSEEfDZRxC+e6/mi7CaFwh8r1QUUHa86RR8jiUxbzm+MWYJ9+ADHFKF0mdEWUJ5JmYhvst1+9wbHQaSR4QOsA59OhvWDAnlvmnnOG9Pa+cpYBE/81pFfWo5cWA9Z+Y0du2hwZ0o8GZzmXyMprlbe3wWClBSg4wc/YuB9229yePM0JLgzdvtqY15IRQcMxUmyBLDRXv1c2oUHVCuSNwjb90gG22nUDxkFlKCjAdySTfw4ACa/U82jdm/KrgZeigxUi0fbkLvBVB+kRzknSMafKM/aEdhlHAlfBKYP9NW3f5xkLyzRt8Rwfwgn8zsdJIdV0b9v6zWQLlUHRA6tfYB0RBiBKmIHkyjes3V1giRYoq9UyCDFBsmMVeLZ39gdcYLZpyApTvb8eUKZ5/WL9I7xmRUunpNalU00GmebZozPLsu7qeJh/0EOJMQ3yG0fo1gcoO/YsV2TUnYRJ4aFKgRZni0rNtoyhf7UpUdDR+NB1iDWP4omHP8YF1RxA1YcEi2V8YqyhJE7IIOr4dLxSQQZrzGb16K+zqH0jvVAUby9crfDGJgIyx5tsSnOU39Yw4WU4Vs6DT0It8Dr5QAjpFEquTrz0B08/vAk26XEfuMJJOfHVCI0PWNXhS5c2MrhAdSCfFBCVnovAZTXVcQixljtyAHdFsmHMt6eQItROPAAh3AOFHkLEPqBMEawOVQ2c3nYznIaWIf16cDyaj1SlXHM2BkxBQauvjwWzdnlgoEP/HdkDsH4f3FGbkWxiiqMogIbF+G85H/f4IU2wksMiTsRjP7vp33Nsn8Tc8DdEkv6SH5oJ29DZ0HU+aXzV9A69qaRX7R2YYhPkZEbMkuV5dUxwREkJsQmXsHF7zo2L9Ptnw43YwlfNy51kilROISWl2T3XpBs54MGqMuDRNBXTNcMTQrWl2o8g9hOvALc66FuPhp9JXiRRI/Vk9HDs9iaTTXh/gYfWv8vwoQKBTVNFglqL61mO8D3t3HgbDcaiEqsWU+UfiBNY0n0+T+iY/x6iUqKJpTbRtr9BsQTko3kKRc2kPDckqUH2ZqxHeVWrPJHfpjDTYuJHwV44uQXxx0WyxCigoakb67/X6zc8KG5YxMXlFZmW2UvraYDLTf9TP7E1Y69UKu2CwhTqpqqU0v85GmjUcjJmyZDftrLsXlrAlDnDNuoH7BXGCznqRCBcnOMgAvmRcf+HmcPXF3wcaojXlelzwafTINXTLDvmBcGzo1XJY3xQbqA8eLjmR1E3EdwxP0trxLOZn2+Qtbow8pConCwWIwo+fMLJEBWRYyS2BehncUc9TRZQpUvqv+mY+UrDvR3UXnkf5EjHtrPDcdgH3QKjK4F5Q9hc3EYJXkAU9b/8E2Di29uUPFGobaBurkl5jWgDsM2BmIfcR4SmsXsRSdf91D2UTr3Wf6f5UiNrxqblJT985hlaRpTr/nzGjOnbBtEOH1eqv6ksUK5W8/8brMIwsx6NPmBClLB/7NwknRGhl6fD6p4SKyD7Gqj2iKzpCKCmQ46+q9efvzaOpKpc/uIHcWv0Hvu1LS3docaAgSG/nEAp4H1vQEyxww7bSCBoDaHKBUmdDtC2jNqjZz/xc71w4RM9aynhOH2rn5oK42LLtzjdyiT0J+xQo2t7kN2m+jxCl4I6w+f6JgWNoVLU9I1vyd+WiMeft6flc2a7Ntj3BC+6/7EX6Jx3OAFSklMLlmAAx4G35dp0Mbw6U6xSL3/eSif11ntphgyibHa7/PfMFOUM/PzlD76cifC66k9J1ZkaMMadQqfTYNeIQVXtqitU5gxzONwG5ykeaXB6MupQ0c9/yR0esMj35/fKfjTIOEC5lYvD9trZntGSK9jwGbQxNqqiY0ooXPbE2aFbB/z7fAzycE5QRumLm6Bhfx2t4bSgZRH+YWPB6BWkcLcfwKkKq2onzXfldiwT1GSguUevJSAAV7b2UIiEzY75tUhbzA0oZDlywx8i9FOIWEwcMqAvAlp/km3ARfZGM/lcQOa3DHUhw/D54S8JOhnEu4uqt5L9FiInnZJWyAdmZMTz7sL6pv+QWLyCTtnPTK06roWzgXIKA5kFD/j1LFVxhsYE282FOksj4/NcfUlB0bYvDVf3/A2haXFLrywp+N3qbZJ3FfWGJIERLlXNanxO7a30lO4RoM1DTdAe7rU2gLV6PdDxmh7v1XnkpfshndhahUOVHSwZSfb0YcOrlH/JzqGXwsZm0b5bzv/7GE0GA==
grid = defaultdict(int)
keys = {}
gate = {}
starting_point = []

#find shortest path https://topaz.github.io/paste/#XQAAAQCnGQAAAAAAAAAzHIoib6pXbueH4X9F244lVRDcOZab5q1+VXY/ex42qR7D/JhOUAl0PRlKyZmMcX/t+JUQyym/jh2oG/1cutq3qMxmEFpEjHMJSSEEfDZRxC+e6/mi7CaFwh8r1QUUHa86RR8jiUxbzm+MWYJ9+ADHFKF0mdEWUJ5JmYhvst1+9wbHQaSR4QOsA59OhvWDAnlvmnnOG9Pa+cpYBE/81pFfWo5cWA9Z+Y0du2hwZ0o8GZzmXyMprlbe3wWClBSg4wc/YuB9229yePM0JLgzdvtqY15IRQcMxUmyBLDRXv1c2oUHVCuSNwjb90gG22nUDxkFlKCjAdySTfw4ACa/U82jdm/KrgZeigxUi0fbkLvBVB+kRzknSMafKM/aEdhlHAlfBKYP9NW3f5xkLyzRt8Rwfwgn8zsdJIdV0b9v6zWQLlUHRA6tfYB0RBiBKmIHkyjes3V1giRYoq9UyCDFBsmMVeLZ39gdcYLZpyApTvb8eUKZ5/WL9I7xmRUunpNalU00GmebZozPLsu7qeJh/0EOJMQ3yG0fo1gcoO/YsV2TUnYRJ4aFKgRZni0rNtoyhf7UpUdDR+NB1iDWP4omHP8YF1RxA1YcEi2V8YqyhJE7IIOr4dLxSQQZrzGb16K+zqH0jvVAUby9crfDGJgIyx5tsSnOU39Yw4WU4Vs6DT0It8Dr5QAjpFEquTrz0B08/vAk26XEfuMJJOfHVCI0PWNXhS5c2MrhAdSCfFBCVnovAZTXVcQixljtyAHdFsmHMt6eQItROPAAh3AOFHkLEPqBMEawOVQ2c3nYznIaWIf16cDyaj1SlXHM2BkxBQauvjwWzdnlgoEP/HdkDsH4f3FGbkWxiiqMogIbF+G85H/f4IU2wksMiTsRjP7vp33Nsn8Tc8DdEkv6SH5oJ29DZ0HU+aXzV9A69qaRX7R2YYhPkZEbMkuV5dUxwREkJsQmXsHF7zo2L9Ptnw43YwlfNy51kilROISWl2T3XpBs54MGqMuDRNBXTNcMTQrWl2o8g9hOvALc66FuPhp9JXiRRI/Vk9HDs9iaTTXh/gYfWv8vwoQKBTVNFglqL61mO8D3t3HgbDcaiEqsWU+UfiBNY0n0+T+iY/x6iUqKJpTbRtr9BsQTko3kKRc2kPDckqUH2ZqxHeVWrPJHfpjDTYuJHwV44uQXxx0WyxCigoakb67/X6zc8KG5YxMXlFZmW2UvraYDLTf9TP7E1Y69UKu2CwhTqpqqU0v85GmjUcjJmyZDftrLsXlrAlDnDNuoH7BXGCznqRCBcnOMgAvmRcf+HmcPXF3wcaojXlelzwafTINXTLDvmBcGzo1XJY3xQbqA8eLjmR1E3EdwxP0trxLOZn2+Qtbow8pConCwWIwo+fMLJEBWRYyS2BehncUc9TRZQpUvqv+mY+UrDvR3UXnkf5EjHtrPDcdgH3QKjK4F5Q9hc3EYJXkAU9b/8E2Di29uUPFGobaBurkl5jWgDsM2BmIfcR4SmsXsRSdf91D2UTr3Wf6f5UiNrxqblJT985hlaRpTr/nzGjOnbBtEOH1eqv6ksUK5W8/8brMIwsx6NPmBClLB/7NwknRGhl6fD6p4SKyD7Gqj2iKzpCKCmQ46+q9efvzaOpKpc/uIHcWv0Hvu1LS3docaAgSG/nEAp4H1vQEyxww7bSCBoDaHKBUmdDtC2jNqjZz/xc71w4RM9aynhOH2rn5oK42LLtzjdyiT0J+xQo2t7kN2m+jxCl4I6w+f6JgWNoVLU9I1vyd+WiMeft6flc2a7Ntj3BC+6/7EX6Jx3OAFSklMLlmAAx4G35dp0Mbw6U6xSL3/eSif11ntphgyibHa7/PfMFOUM/PzlD76cifC66k9J1ZkaMMadQqfTYNeIQVXtqitU5gxzONwG5ykeaXB6MupQ0c9/yR0esMj35/fKfjTIOEC5lYvD9trZntGSK9jwGbQxNqqiY0ooXPbE2aFbB/z7fAzycE5QRumLm6Bhfx2t4bSgZRH+YWPB6BWkcLcfwKkKq2onzXfldiwT1GSguUevJSAAV7b2UIiEzY75tUhbzA0oZDlywx8i9FOIWEwcMqAvAlp/km3ARfZGM/lcQOa3DHUhw/D54S8JOhnEu4uqt5L9FiInnZJWyAdmZMTz7sL6pv+QWLyCTtnPTK06roWzgXIKA5kFD/j1LFVxhsYE282FOksj4/NcfUlB0bYvDVf3/A2haXFLrywp+N3qbZJ3FfWGJIERLlXNanxO7a30lO4RoM1DTdAe7rU2gLV6PdDxmh7v1XnkpfshndhahUOVHSwZSfb0YcOrlH/JzqGXwsZm0b5bzv/7GE0GA==

for y, row in enumerate(lines):
    for x, cell in enumerate(row):
      grid[(x, y)] = cell

      if cell == "@":
        pos = (x, y)
      elif cell >= "a" and cell <= "z":
        keys[cell] = (x, y)
      elif cell >= "A" and cell <= "Z":
        doors[cell.lower()] = (x, y)

  return grid, doors, keys, pos

with open("input.txt") as f:
    for line in f:

Ejemplo n.º 31

0

Mostrar archivo

def positionMap(l):
    positions = defaultdict(list)
    for i,p in enumerate(l):
        positions[p].append(i)   
    return positions

Ejemplo n.º 32

0

Mostrar archivo

Archivo: preprocess.py Proyecto: morganecf/topic-modeling

def findAllLinks(data):
    sites = defaultdict(int)
    for document in data:
        for link in document.websites:
            sites[link] += 1
    return sites

Ejemplo n.º 33

0

Mostrar archivo

Archivo: dict_excersize.py Proyecto: vijey-yuvaraj/python_complete_tutorial-beginners

Expected Output: defaultdict(<class 'set'>, {'Class-VII': {2}, 'Class-VI': {2}, 'Class-VIII': {3}, 'Class-V': {1}})
Click me to see the sample solution

METHOD 1:

lists= ['Class-V', 'Class-VI', 'Class-VII', 'Class-VIII']
list_two=[1,2,3,4]
c=zip(lists,list_two)
print(dict(c))

METHOD 2:

from collections import defaultdict
class_list = ['Class-V', 'Class-VI', 'Class-VII', 'Class-VIII']
id_list = [1, 2, 2, 3]
temp = defaultdict(set)
for c, i in zip(class_list, id_list):
    print(c,i)
    temp[c].add(i)
print(temp)

-----------------------------------------------------------------------------------
# 37. Write a Python program to replace dictionary values with their sum. Go to the editor
# Click me to see the sample solution

def bank_statements(*args):
    for d in args:
        n1=d.pop('Current Balance')
        n2=d.pop('Fixed Account')
        d['Overall_Account']=n1+n2
        yield d

Ejemplo n.º 34

0

Mostrar archivo

class Edit:    defer = defaultdict(dict)
 def __init__(self, view): self.view = view self.steps = []

Ejemplo n.º 35

0

Mostrar archivo

import defaultdict
import datetime
# Create a defaultdict of an integer: monthly_total_rides

monthly_total_rides = defaultdict(int)

# Loop over the list daily_summaries
for daily_summary in daily_summaries:
    # Convert the service_date to a datetime object
    service_datetime = datetime.strptime(daily_summary[0], '%m/%d/%Y')

    # Add the total rides to the current amount for the month
    monthly_total_rides[service_datetime.month] += int(daily_summary[4])

# Print monthly_total_rides
print(monthly_total_rides)

Ejemplo n.º 36

0

Mostrar archivo

In [8]: from collections import defaultdict

In [9]: age_groups = defaultdict(list)

In [10]: for person in people:
    ...:     age_groups[person.age].append(person)
    ...:     

In [11]: for k in age_groups:
    ...:     print(k, age_groups[k])
    ...:     
40 [40, 40]
18 [18, 18, 18]
42 [42]
25 [25]
23 [23]
80 [80]
67 [67]

Ejemplo n.º 37

0

Mostrar archivo

Archivo: arne-cl_mxpost.py Proyecto: ccavxx/py-search

# -*- coding: utf-8 -*-# Maximum Entropy Part-of-Speech Tagger for NLTK (Natural Language Toolkit)# Author: Arne Neumann# Licence: GPL 3
#__docformat__ = 'epytext en'
"""A I{part-of-speech tagger} that uses NLTK's build-in L{Maximum Entropymodels<nltk.MaxentClassifier>} to find the most likely I{part-of-speechtag} (POS) for each word in a given sequence.The tagger will be trained on a corpus of tagged sentences. For every wordin the corpus, a C{tuple} consisting of a C{dictionary} of features fromthe word's context (e.g. preceding/succeeding words and tags, wordprefixes/suffixes etc.) and the word's tag will be generated.The maximum entropy classifier will learn a model from these tuples thatwill be used by the tagger to find the most likely POS-tag for any givenword, even unseen ones.The tagger and the featuresets chosen for training are implemented as describedin Ratnaparkhi, Adwait (1996). A Maximum Entropy Model for Part-Of-SpeechTagging. In Proceedings of the ARPA Human Language Technology Workshop. Pages250-255.Usage notes:============Please install the MEGAM package (http://hal3.name/megam),otherwise training will take forever.To use the demo, please install either 'brown' or 'treebank' with::    import nltk    nltk.download()in the Python interpreter. Proper usage of demo() and all other functions andmethods is described below."""
import timeimport refrom collections import defaultdict
from nltk import TaggerI, FreqDist, untag, config_megamfrom nltk.classify.maxent import MaxentClassifier 
PATH_TO_MEGAM_EXECUTABLE = "/usr/bin/megam"config_megam(PATH_TO_MEGAM_EXECUTABLE)

class MaxentPosTagger(TaggerI): """    MaxentPosTagger is a part-of-speech tagger based on Maximum Entropy models. """ def train(self, train_sents, algorithm='megam', rare_word_cutoff=5, rare_feat_cutoff=5, uppercase_letters='[A-Z]', trace=3, **cutoffs): """        MaxentPosTagger trains a Maximum Entropy model from a C{list} of tagged        sentences.        @type train_sents: C{list} of C{list} of tuples of (C{str}, C{str})        @param train_sents: A list of tagged sentences. Each sentence is        represented by a list of tuples. Each tuple holds two strings, a        word and its tag, e.g. ('company','NN').        @type algorithm: C{str}        @param algorithm: The algorithm that is used by        L{nltk.MaxentClassifier.train()} to train and optimise the model. It is        B{strongly recommended} to use the C{LM-BFGS} algorithm provided by the        external package U{megam<http://hal3.name/megam/>} as it is much faster        and uses less memory than any of the algorithms provided by NLTK (i.e.        C{GIS}, C{IIS}) or L{scipy} (e.g. C{CG} and C{BFGS}).        @type rare_word_cutoff: C{int}        @param rare_word_cutoff: Words with less occurrences than        C{rare_word_cutoff} will be treated differently by L{extract_feats}        than non-rare words (cf. Ratnaparkhi 1996).        @type rare_feat_cutoff: C{int}        @param rare_feat_cutoff: ignore features that occur less than        C{rare_feat_cutoff} during training.        @type uppercase_letters: C{regex}        @param uppercase_letters: a regular expression that covers all        uppercase letters of the language of your corpus (e.g. '[A-Z]' for        German)        @type trace: C{int}        @param trace: The level of diagnostic output to produce. C{0} doesn't        produce any output, while C{3} will give all the output that C{megam}        produces plus the time it took to train the model.        @param cutoffs: Arguments specifying various conditions under            which the training should be halted. When using C{MEGAM}, only            C{max_iter} should be relevant. For other cutoffs see            L{nltk.MaxentClassifier}              - C{max_iter=v}: Terminate after C{v} iterations. """ self.uppercase_letters = uppercase_letters self.word_freqdist = self.gen_word_freqs(train_sents) self.featuresets = self.gen_featsets(train_sents,                rare_word_cutoff) self.features_freqdist = self.gen_feat_freqs(self.featuresets) self.cutoff_rare_feats(self.featuresets, rare_feat_cutoff)
        t1 = time.time() self.classifier = MaxentClassifier.train(self.featuresets, algorithm,                                                 trace, **cutoffs)        t2 = time.time() if trace > 0: print "time to train the classifier: {0}".format(round(t2-t1, 3))
 def gen_feat_freqs(self, featuresets): """        Generates a frequency distribution of joint features (feature, tag)        tuples. The frequency distribution will be used by the tagger to        determine which (rare) features should not be considered during        training (feature cutoff).        This is how joint features look like::            (('t-2 t-1', 'IN DT'), 'NN')            (('w-2', '<START>'), 'NNP')            (('w+1', 'of'), 'NN')        @type featuresets: {list} of C{tuples} of (C{dict}, C{str})        @param featuresets: a list of tuples that contain the featureset of a        word from the training set and its POS tag.        @rtype: C{FreqDist}        @return: a L{frequency distribution<nltk.FreqDist()>},        counting how often each (context information feature, tag) tuple occurs        in the training sentences. """        features_freqdist = defaultdict(int) for (feat_dict, tag) in featuresets: for (feature, value) in feat_dict.items():                features_freqdist[ ((feature, value), tag) ] += 1 return features_freqdist
 def gen_word_freqs(self, train_sents): """        Generates word frequencies from the training sentences for the feature        extractor.        @type train_sents: C{list} of C{list} of tuples of (C{str}, C{str})        @param train_sents: A list of tagged sentences.        @rtype: C{FreqDist}        @return: a L{frequency distribution<nltk.FreqDist()>},        counting how often each word occurs in the training sentences. """        word_freqdist = FreqDist() for tagged_sent in train_sents: for (word, _tag) in tagged_sent:                word_freqdist[word] += 1 return word_freqdist
 def gen_featsets(self, train_sents, rare_word_cutoff): """        Generates featuresets for each token in the training sentences.        @type train_sents: C{list} of C{list} of tuples of (C{str}, C{str})        @param train_sents: A list of tagged sentences.        @type rare_word_cutoff: C{int}        @param rare_word_cutoff: Words with less occurrences than        C{rare_word_cutoff} will be treated differently by L{extract_feats}        than non-rare words (cf. Ratnaparkhi 1996).        @rtype: {list} of C{tuples} of (C{dict}, C{str})        @return:  a list of tuples that contains the featureset of        a token and its POS-tag. """        featuresets = [] for tagged_sent in train_sents:            history = []            untagged_sent = untag(tagged_sent) for (i, (_word, tag)) in enumerate(tagged_sent):                featuresets.append( (self.extract_feats(untagged_sent, i,                    history, rare_word_cutoff), tag) )                history.append(tag) return featuresets

 def cutoff_rare_feats(self, featuresets, rare_feat_cutoff): """        Cuts off rare features to reduce training time and prevent overfitting.        Example        =======            Let's say, the suffixes of this featureset are too rare to learn. >>> featuresets[46712]            ({'suffix(1)': 't',            'prefix(1)': 'L',            'prefix(2)': 'Le',            'prefix(3)': 'Lem',            'suffix(3)': 'ont',            'suffix(2)': 'nt',            'contains-uppercase': True,            'prefix(4)': 'Lemo',            'suffix(4)': 'mont'},            'NNP')            C{cutoff_rare_feats} would then remove the rare joint features::                (('suffix(1)', 't'), 'NNP')                (('suffix(3)', 'ont'), 'NNP')                ((suffix(2)': 'nt'), 'NNP')                (('suffix(4)', 'mont'), 'NNP')            and return a featureset that only contains non-rare features: >>> featuresets[46712]            ({'prefix(1)': 'L',            'prefix(2)': 'Le',            'prefix(3)': 'Lem',            'contains-uppercase': True,            'prefix(4)': 'Lemo'},            'NNP')        @type featuresets: {list} of C{tuples} of (C{dict}, C{str})        @param featuresets: a list of tuples that contain the featureset of a        word from the training set and its POS tag        @type rare_feat_cutoff: C{int}        @param rare_feat_cutoff: if a (context information feature, tag)        tuple occurs less than C{rare_feat_cutoff} times in the training        set, then its corresponding feature will be removed from the        C{featuresets} to be learned. """        never_cutoff_features = set(['w','t'])
 for (feat_dict, tag) in featuresets: for (feature, value) in feat_dict.items():                feat_value_tag = ((feature, value), tag) if self.features_freqdist[feat_value_tag] < rare_feat_cutoff: if feature not in never_cutoff_features:                        feat_dict.pop(feature)

 def extract_feats(self, sentence, i, history, rare_word_cutoff=5): """        Generates a featureset from a word (in a sentence). The features        were chosen as described in Ratnaparkhi (1996) and his Java        software package U{MXPOST<ftp://ftp.cis.upenn.edu/pub/adwait/jmx>}.        The following features are extracted:            - features for all words: last tag (C{t-1}), last two tags (C{t-2              t-1}), last words (C{w-1}) and (C{w-2}), next words (C{w+1}) and              (C{w+2})            - features for non-rare words: current word (C{w})            - features for rare words: word suffixes (last 1-4 letters),              word prefixes (first 1-4 letters),              word contains number (C{bool}), word contains uppercase character              (C{bool}), word contains hyphen (C{bool})        Ratnaparkhi experimented with his tagger on the Wall Street Journal        corpus (Penn Treebank project). He found that the tagger yields        better results when words which occur less than 5 times are treated        as rare. As your mileage may vary, please adjust        L{rare_word_cutoff} accordingly.        Examples        ========            1. This is a featureset extracted from the nonrare (word, tag)            tuple ('considerably', 'RB') >>> featuresets[22356]            ({'t-1': 'VB',            't-2 t-1': 'TO VB',            'w': 'considerably',            'w+1': '.',            'w+2': '<END>',            'w-1': 'improve',            'w-2': 'to'},            'RB')            2. A featureset extracted from the rare tuple ('Lemont', 'NN') >>> featuresets[46712]            ({'suffix(1)': 't',            'prefix(1)': 'L',            'prefix(2)': 'Le',            'prefix(3)': 'Lem',            'suffix(3)': 'ont',            'suffix(2)': 'nt',            'contains-uppercase': True,            'prefix(4)': 'Lemo',            'suffix(4)': 'mont'},            'NNP')        @type sentence: C{list} of C{str}        @param sentence: A list of words, usually a sentence.        @type i: C{int}        @param i: The index of a word in a sentence, where C{sentence[0]} would        represent the first word of a sentence.        @type history: C{int} of C{str}        @param history: A list of POS-tags that have been assigned to the        preceding words in a sentence.        @type rare_word_cutoff: C{int}        @param rare_word_cutoff: Words with less occurrences than        C{rare_word_cutoff} will be treated differently than non-rare words        (cf. Ratnaparkhi 1996).        @rtype: C{dict}        @return: a dictionary of features extracted from a word's        context. """        features = {}        hyphen = re.compile("-")        number = re.compile("\d")        uppercase = re.compile(self.uppercase_letters)
 #get features: w-1, w-2, t-1, t-2. #takes care of the beginning of a sentence if i == 0: #first word of sentence            features.update({"w-1": "<START>", "t-1": "<START>", "w-2": "<START>", "t-2 t-1": "<START> <START>"}) elif i == 1: #second word of sentence            features.update({"w-1": sentence[i-1], "t-1": history[i-1], "w-2": "<START>", "t-2 t-1": "<START> %s" % (history[i-1])}) else:            features.update({"w-1": sentence[i-1], "t-1": history[i-1], "w-2": sentence[i-2], "t-2 t-1": "%s %s" % (history[i-2], history[i-1])})
 #get features: w+1, w+2. takes care of the end of a sentence. for inc in [1, 2]: try:                features["w+%i" % (inc)] = sentence[i+inc] except IndexError:                features["w+%i" % (inc)] = "<END>"
 if self.word_freqdist[sentence[i]] >= rare_word_cutoff: #additional features for 'non-rare' words            features["w"] = sentence[i]
 else: #additional features for 'rare' or 'unseen' words            features.update({"suffix(1)": sentence[i][-1:], "suffix(2)": sentence[i][-2:], "suffix(3)": sentence[i][-3:], "suffix(4)": sentence[i][-4:], "prefix(1)": sentence[i][:1], "prefix(2)": sentence[i][:2], "prefix(3)": sentence[i][:3], "prefix(4)": sentence[i][:4]}) if hyphen.search(sentence[i]) != None: #set True, if regex is found at least once                features["contains-hyphen"] = True if number.search(sentence[i]) != None:                features["contains-number"] = True if uppercase.search(sentence[i]) != None:                features["contains-uppercase"] = True
 return features

 def tag(self, sentence, rare_word_cutoff=5): """        Attaches a part-of-speech tag to each word in a sequence.        @type sentence: C{list} of C{str}        @param sentence: a list of words to be tagged.        @type rare_word_cutoff: C{int}        @param rare_word_cutoff: words with less occurrences than        C{rare_word_cutoff} will be treated differently than non-rare words        (cf. Ratnaparkhi 1996).        @rtype: C{list} of C{tuples} of (C{str}, C{str})        @return: a list of tuples consisting of a word and its corresponding        part-of-speech tag. """        history = [] for i in xrange(len(sentence)):            featureset = self.extract_feats(sentence, i, history,                                               rare_word_cutoff)            tag = self.classifier.classify(featureset)            history.append(tag) return zip(sentence, history)

def demo(corpus, num_sents): """    Loads a few sentences from the Brown corpus or the Wall Street Journal    corpus, trains them, tests the tagger's accuracy and tags an unseen    sentence.    @type corpus: C{str}    @param corpus: Name of the corpus to load, either C{brown} or C{treebank}.    @type num_sents: C{int}    @param num_sents: Number of sentences to load from a corpus. Use a small    number, as training might take a while. """ if corpus.lower() == "brown": from nltk.corpus import brown        tagged_sents = brown.tagged_sents()[:num_sents] elif corpus.lower() == "treebank": from nltk.corpus import treebank        tagged_sents = treebank.tagged_sents()[:num_sents] else: print "Please load either the 'brown' or the 'treebank' corpus."
    size = int(len(tagged_sents) * 0.1)    train_sents, test_sents = tagged_sents[size:], tagged_sents[:size]    maxent_tagger = MaxentPosTagger()    maxent_tagger.train(train_sents) print "tagger accuracy (test %i sentences, after training %i):" % \        (size, (num_sents - size)), maxent_tagger.evaluate(test_sents) print "\n\n" print "classify unseen sentence: ", maxent_tagger.tag(["This", "is", "so", "slow", "!"]) print "\n\n" print "show the 10 most informative features:" print maxent_tagger.classifier.show_most_informative_features(10)

if __name__ == '__main__':    demo("treebank", 200) #~ featuresets = demo_debugger("treebank", 10000) print "\n\n\n"

Ejemplo n.º 38

0

Mostrar archivo

Archivo: get_snp_gene_hotspots.py Proyecto: dimenwarper/gett

import argparse
import operator
import defaultdict

parser = argparse.ArgumentParser()

parser.add_argument('regression_result_file', type=argparse.FileType('r'))
parser.add_argument('snp_coordinate_file', type=argparse.FileType('r'))
parser.add_argument('refgene_file', type=argparse.FileType('r'))
parser.add_argument('outfile', type=argparse.FileType('w'))

args = parser.parse_args()

snp_list = [l.strip().split('\t')[0] for l in args.regression_result_file.readlines()]

snp_coords = defaultdict(list)
line = args.snp_coordinate_file.readline()
while line:
    fields = line.strip().split('\t')
    try:
        snp_coords[fields[1]].append((fields[0], int(fields[2])))
    except Exception:
        pass
    line = args.snp_coordinate_file.readline()

gene_counts = defaultdict(int)

line = args.refgene_file.readline()
while line:
    fields = line.strip().split('\t')
    chrom = fields[2].replace('chr','')

Ejemplo n.º 39

0

Mostrar archivo

Archivo: RET_iTP.py Proyecto: axvdstel/RET-iTP

               scores_min, genome)

log('Library coorection finished')
################# Score the data on nucleotide level

all_scores = RET_iTP_scorer('Analysis/RET-iTP_scores_all.csv')

log('scoring finished')
############### FIND ALL POSSIBLE ORFs

pool = multiprocessing.Pool(processes=2)
orfs_plus_coordinates, orfs_min_coordinates_rc = pool.map(
    orf_finder, [genome.seq, revcom(genome.seq)])
pool.close()
orfs_min_coordinates = defaultdict(lambda: 1, {
    genome_l - x: [genome_l - y[0]]
    for x, y in orfs_min_coordinates_rc.items()
})
log('ORFS located')

############################ Divide data into ORFs and negatives (non-ORFs)

handle_orf_scores = 'Analysis/ORF_scores.csv'
handle_negatives_scores = 'Analysis/negatives_scores.csv'
(orfs_plus_scored, negatives_plus_scored) = scorer('plus',
                                                   orfs_plus_coordinates,
                                                   all_scores)
(orfs_min_out_scored, negatives_min_scored) = scorer('min',
                                                     orfs_min_coordinates,
                                                     all_scores)
RET_score_writer(orfs_plus_scored, negatives_plus_scored, orfs_min_out_scored,
                 negatives_min_scored, handle_out_orfs, handle_out_negatives)

Ejemplo n.º 40

0

Mostrar archivo

Archivo: EliFinkelshteyn_alphabet_detector.py Proyecto: ccavxx/py-search

class AlphabetDetector: def __init__(self, no_memory=False): self.alphabet_letters = defaultdict(dict) self.no_memory = no_memory