def indexCityId(): cspace = re.compile('\s') cities = set() rCity = list() rs = getResturants() # type: list[Restaurant] with open('projectData/labels2.json', 'r+') as cin: labels = json.load(cin) for r in rs: rCity.append(r.city + r.id) indexCitiesName = {} for idx, c in enumerate(rCity): indexCitiesName[c] = str(idx) indexCitiesName[str(idx)] = c for r in rs: r.cid = indexCitiesName[r.city + r.id] with open('projectData/restaurants2.json', 'w+') as cc: json.dump(rs, cc, indent=2, sort_keys=True, default=lambda x: x.for_json()) with open('projectData/indexCityName.json', 'w+') as cc: json.dump(indexCitiesName, cc, indent=2)
def final2(): labelsMapped = {} unique = {} with open('projectData/labels.json', 'r') as cin: ls = json.load(cin) # type: dict[str,list] for k, v in ls.items(): for vv in v: labelsMapped[vv] = k reduce = ['Indian', 'Mexican', 'Italian', 'French', 'American', 'Mex'] newUnique = set() newcuisine = set() reduceMapping = {} for c in ls['cuisine']: wasIn = False it = None for r in reduce: if r in c: if c != 'French-Japanese': it = 'Mexican' if r == 'Mex' or r == 'Mexican' else r wasIn = True newcuisine.add(it) if not wasIn: newcuisine.add(c) ls['cuisine'] = sorted(list(newcuisine)) for k, v in ls.items(): for vv in v: newUnique.add(vv) out = {} for idx, it in enumerate(sorted(newUnique)): out[idx] = it with open('projectData/reducedFeatures.json', 'w+') as cin: json.dump(out, cin, indent=2, sort_keys=True) with open('projectData/labels2.json', 'w+') as cin: json.dump(ls, cin, indent=2, sort_keys=True) bway = {} with open('projectData/reducedFeatures.json', 'r') as cin: uf = json.load(cin) # type: dict[str,list] for k, v in uf.items(): bway[v] = k bway[k] = v rs = getResturants() # type: list[Restaurant] reduce = ['Indian', 'Mexican', 'Italian', 'French', 'American', 'Mex'] for r in rs: newlabels = [] for l in r.labels: if l.label == 'cuisine': for r in reduce: if r in l.val: # print('before change', l) it = 'Mexican' if r == 'Mex' or r == 'Mexican' else r l.val = it l.num = bway[l.val] # print('after change', l) for r in rs: for l in r.labels: if l.label == 'cuisine': print(l, l.num) with open('projectData/restaurants2.json', 'w+') as cc: json.dump(rs, cc, indent=2, sort_keys=True, default=lambda x: x.for_json())
# o.write('restaurant,features%s'%os.linesep) # rs = getResturants() # type: list[Restaurant] # # for r in filter(lambda x: x.hasLabelValue(('cuisine',['Indian', 'Mexican', 'Italian', 'French', # 'American'])),rs): # o.write(r.for_csv()) # with open('projectData/reducedFeatures.json','r') as rin: # rfs = json.load(rin) # # with open('projectData/labels2.json', 'r') as rin: # labs = json.load(rin) # # print(labs) nums = [] rs = getResturants() # type: list[Restaurant] rests = [] labs = [] for r in filter(lambda x: x.hasLabelValue(('cuisine', ['Indian', 'Mexican', 'Italian', 'French', 'American'])), rs): re,lab = r.for_np() print(r.for_json()) rests.append(re) labs.append(lab) np_resturants = np.array(rests) # np_labels = np.array(labs) # dt = DecisionTreeClassifier()#min_samples_split=20, random_state=99) # dt.fit(np_resturants,np_labels)