Esempio n. 1
0
    def __init__(self, data):
        '''Sets up the required data and builds the tree

        `data` should a list of tuples of the form:
            `(feature_dict, classification)`
        where each tuple is a data point. `feature_dict` is a dictionary
        mapping names of features to values for the data point, and
        classification is the classification of that data point.

        In several places in this constructer, `dfdict`, or defaultdicts,
        are used. They work like normal dicts, except when a key without a
        value is accessed, they set that value to the result of calling
        the function they're passed instead of throwing a key error. This
        is useful for the various counting tasks being performed.

        Additionally, Counters dicts that behave in a similar way, except
        that it simply maps from values to their integer counts.
        '''

        self.data = data
        # using zip in this manner will separate the list of 2-tuples into
        # two lists of the first and second values of those tuples
        self.features, self.classifications = zip(*data)

        self.classification_domain = set(self.classifications)
        self.feature_indexes = list(self.features[0].keys())

        # all of these default dicts are set up to be filled in the for below
        
        # maps from features to their possible values
        self.feature_domains = dfdict(set)
        # maps from features to a Counter (which maps from the feature values
        # to the number of their occurrences)
        self.feature_value_counts = dfdict(Counter)
        # maps from features to their possible values to a Counter of the
        # classifications for data points with that value for that feature
        self.feature_classification_counts = dfdict(
            lambda: dfdict(Counter))
        for row, class_ in self.data:
            for key, value in row.items():
                self.feature_domains[key].add(value)
                self.feature_value_counts[key][value] += 1
                self.feature_classification_counts[key][value][class_] += 1

        self.classification_counts = dict(Counter(self.classifications))
        self.build_tree()
Esempio n. 2
0
 def canCross(self, stones):
     """
     :type stones: List[int]
     :rtype: bool
     """
     from collections import defaultdict as dfdict
     visited = dfdict(lambda: dfdict(bool))
     queue = [(0, 0)]
     stoneSet = set(stones)
     visited[0][0] = True
     while queue:
         x, y = queue.pop(0)
         if x == stones[-1]: return True
         for i in (max(y - 1, 0), y, y + 1):
             if not visited[x + i][i] and (x + i) in stoneSet:
                 visited[x + i][i] = True
                 queue.append((x + i, i))
     return False
Esempio n. 3
0
 def __init__(self,
              Description='No Description',
              mandatory=False,
              aliases=None,
              ValidPattern=None):
     self.Param = dfdict(None)
     self.Param['Descrip'] = Description
     self.Param['Must'] = mandatory
     self.Param['Alias'] = aliases if aliases else []
     self.Param['Pattern'] = ValidPattern
Esempio n. 4
0
 def longestPalindrome(self, s):
     """
     :type s: str
     :rtype: int
     """
     charmap = dfdict(int)
     maxlength = 0
     flag = 0
     for x in s:
         charmap[x] += 1
     for k,v in charmap.items():
         if v % 2 == 0:
             maxlength += v
         else:
             flag = 1
             maxlength += v-1
     return maxlength + flag
Esempio n. 5
0
 def __init__(self,Description='No Description',mandatory=False,aliases=None,ValidPattern=None):
     self.Param=dfdict(None)
     self.Param['Descrip']=Description
     self.Param['Must']=mandatory
     self.Param['Alias']= aliases if aliases else []
     self.Param['Pattern']=ValidPattern
Esempio n. 6
0
 def __init__(self,**ParamMap):
     self.Conf=dfdict(CParam)
     for PName,PRec in ParamMap.items():
         print "Debug - Add %s > %s" % (PName,PRec)
         self.AddParam(PName,**PRec)
from collections import defaultdict as dfdict
n, m = map(int, input().split())
d = dfdict(list)
l1 = []
for i in range(n):
    d[input()].append(i + 1)

for i in range(m):
    l1 = l1 + [input()]

for i in l1:
    if i in d:
        print(' '.join(map(str, d[i])))
    else:
        print(-1)
"""
Collections:
Counter -> returns dictionary
defaultdict -> define default type
"""
Esempio n. 8
0
def count_tags(filename):
    count = dfdict(int)
    for item in ET.iterparse(filename):
        count[item[1].tag] += 1
    return count
Esempio n. 9
0
import xml.etree.cElementTree as ET
from collections import defaultdict as dfdict
import re
import pprint
import csv

# In[30]:

#The OSM file path
OSM_PATH = "C:\Udacity\Nano degree\Core Curriculam 4_Data Wrangling\Project\Milwaukee_Map.osm"

# In[31]:

#Finds the last part in the postal code
zip_type_regex = re.compile(r'\b\S+\.?$', re.IGNORECASE)
zip_types = dfdict(set)
#This is kept blank, as the definig the expected one difficult in this case, as the area is not particularly defined.
expected_zip = {}

# In[32]:


#This method tries to find out zip codes which are not with proper name. Hence it is cheking for zip code not listed
# in the expected name list. As the list is empty, it will process all the zip code
def audit_zip_type(zip_types, zip_name, regex):
    zip_type = 'Extended'
    if '-' in zip_name or 'WI' in zip_name:
        zip_types[zip_type].add(zip_name)

#m = regex.search(zip_name)
#if m:
from collections import defaultdict as dfdict
import re
import pprint


# In[4]:

#The OSM file path
OSM_PATH = "C:\Udacity\Nano degree\Core Curriculam 4_Data Wrangling\Project\Milwaukee_Map.osm"


# In[5]:

#Finds the very last word in the street name
street_type_regex_post = re.compile(r'\b\S+\.?$', re.IGNORECASE)
street_types_post = dfdict(set)
#Expected street names
expected_post = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square" "Lane", "Road", "Trail", "Parkway",
                 "Commons", "Way"]

#Finds the very first word in the street name
street_type_regex_pre = re.compile(r'^[NSEW]\b\.?', re.IGNORECASE) 
street_types_pre = dfdict(set)
#Expected direction form
expected_pre = ["North", "South", "East", "West"]


# In[6]:

#Checks if the element is for street or not
def is_street_name(elem):    
#Import all the required modules
import xml.etree.cElementTree as ET
from collections import defaultdict as dfdict
import re
import pprint
import csv

# In[2]:

#The OSM file path
OSM_PATH = "C:\Udacity\Nano degree\Core Curriculam 4_Data Wrangling\Project\Milwaukee_Map.osm"

# In[3]:

phone_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)
phone_types = dfdict(set)
expected_phone = {}


def audit_phone_num(phone_types, phone_num, regex, expected_phone):
    m = regex.search(phone_num)
    if m:
        phone_type = m.group()
        if phone_type not in expected_phone:
            phone_types[phone_type].add(phone_num)


# In[4]:


def is_phone_num(elem):