def __init__(self, data): '''Sets up the required data and builds the tree `data` should a list of tuples of the form: `(feature_dict, classification)` where each tuple is a data point. `feature_dict` is a dictionary mapping names of features to values for the data point, and classification is the classification of that data point. In several places in this constructer, `dfdict`, or defaultdicts, are used. They work like normal dicts, except when a key without a value is accessed, they set that value to the result of calling the function they're passed instead of throwing a key error. This is useful for the various counting tasks being performed. Additionally, Counters dicts that behave in a similar way, except that it simply maps from values to their integer counts. ''' self.data = data # using zip in this manner will separate the list of 2-tuples into # two lists of the first and second values of those tuples self.features, self.classifications = zip(*data) self.classification_domain = set(self.classifications) self.feature_indexes = list(self.features[0].keys()) # all of these default dicts are set up to be filled in the for below # maps from features to their possible values self.feature_domains = dfdict(set) # maps from features to a Counter (which maps from the feature values # to the number of their occurrences) self.feature_value_counts = dfdict(Counter) # maps from features to their possible values to a Counter of the # classifications for data points with that value for that feature self.feature_classification_counts = dfdict( lambda: dfdict(Counter)) for row, class_ in self.data: for key, value in row.items(): self.feature_domains[key].add(value) self.feature_value_counts[key][value] += 1 self.feature_classification_counts[key][value][class_] += 1 self.classification_counts = dict(Counter(self.classifications)) self.build_tree()
def canCross(self, stones): """ :type stones: List[int] :rtype: bool """ from collections import defaultdict as dfdict visited = dfdict(lambda: dfdict(bool)) queue = [(0, 0)] stoneSet = set(stones) visited[0][0] = True while queue: x, y = queue.pop(0) if x == stones[-1]: return True for i in (max(y - 1, 0), y, y + 1): if not visited[x + i][i] and (x + i) in stoneSet: visited[x + i][i] = True queue.append((x + i, i)) return False
def __init__(self, Description='No Description', mandatory=False, aliases=None, ValidPattern=None): self.Param = dfdict(None) self.Param['Descrip'] = Description self.Param['Must'] = mandatory self.Param['Alias'] = aliases if aliases else [] self.Param['Pattern'] = ValidPattern
def longestPalindrome(self, s): """ :type s: str :rtype: int """ charmap = dfdict(int) maxlength = 0 flag = 0 for x in s: charmap[x] += 1 for k,v in charmap.items(): if v % 2 == 0: maxlength += v else: flag = 1 maxlength += v-1 return maxlength + flag
def __init__(self,Description='No Description',mandatory=False,aliases=None,ValidPattern=None): self.Param=dfdict(None) self.Param['Descrip']=Description self.Param['Must']=mandatory self.Param['Alias']= aliases if aliases else [] self.Param['Pattern']=ValidPattern
def __init__(self,**ParamMap): self.Conf=dfdict(CParam) for PName,PRec in ParamMap.items(): print "Debug - Add %s > %s" % (PName,PRec) self.AddParam(PName,**PRec)
from collections import defaultdict as dfdict n, m = map(int, input().split()) d = dfdict(list) l1 = [] for i in range(n): d[input()].append(i + 1) for i in range(m): l1 = l1 + [input()] for i in l1: if i in d: print(' '.join(map(str, d[i]))) else: print(-1) """ Collections: Counter -> returns dictionary defaultdict -> define default type """
def count_tags(filename): count = dfdict(int) for item in ET.iterparse(filename): count[item[1].tag] += 1 return count
import xml.etree.cElementTree as ET from collections import defaultdict as dfdict import re import pprint import csv # In[30]: #The OSM file path OSM_PATH = "C:\Udacity\Nano degree\Core Curriculam 4_Data Wrangling\Project\Milwaukee_Map.osm" # In[31]: #Finds the last part in the postal code zip_type_regex = re.compile(r'\b\S+\.?$', re.IGNORECASE) zip_types = dfdict(set) #This is kept blank, as the definig the expected one difficult in this case, as the area is not particularly defined. expected_zip = {} # In[32]: #This method tries to find out zip codes which are not with proper name. Hence it is cheking for zip code not listed # in the expected name list. As the list is empty, it will process all the zip code def audit_zip_type(zip_types, zip_name, regex): zip_type = 'Extended' if '-' in zip_name or 'WI' in zip_name: zip_types[zip_type].add(zip_name) #m = regex.search(zip_name) #if m:
from collections import defaultdict as dfdict import re import pprint # In[4]: #The OSM file path OSM_PATH = "C:\Udacity\Nano degree\Core Curriculam 4_Data Wrangling\Project\Milwaukee_Map.osm" # In[5]: #Finds the very last word in the street name street_type_regex_post = re.compile(r'\b\S+\.?$', re.IGNORECASE) street_types_post = dfdict(set) #Expected street names expected_post = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square" "Lane", "Road", "Trail", "Parkway", "Commons", "Way"] #Finds the very first word in the street name street_type_regex_pre = re.compile(r'^[NSEW]\b\.?', re.IGNORECASE) street_types_pre = dfdict(set) #Expected direction form expected_pre = ["North", "South", "East", "West"] # In[6]: #Checks if the element is for street or not def is_street_name(elem):
#Import all the required modules import xml.etree.cElementTree as ET from collections import defaultdict as dfdict import re import pprint import csv # In[2]: #The OSM file path OSM_PATH = "C:\Udacity\Nano degree\Core Curriculam 4_Data Wrangling\Project\Milwaukee_Map.osm" # In[3]: phone_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE) phone_types = dfdict(set) expected_phone = {} def audit_phone_num(phone_types, phone_num, regex, expected_phone): m = regex.search(phone_num) if m: phone_type = m.group() if phone_type not in expected_phone: phone_types[phone_type].add(phone_num) # In[4]: def is_phone_num(elem):