def get_all_business_types(): businesses = loader.get_challengeset() idtoloc = loader.get_idtoloc() business_types_dict = loader.get_business_types() print "Done {} of {}".format(len(business_types_dict), len(businesses)) for business in businesses: unique_id = business['unique_id'] if unique_id not in business_types_dict.keys(): print business['name'] closest_place, best_sim = None, 0 lat, lon = idtoloc[unique_id] for place in get_places(lat, lon): sim = cosine_sim(place['name'], business['name']) if sim > best_sim: closest_place = place best_sim = sim if closest_place: types = filter(lambda x: not x in ['point_of_interest', 'establishment', 'sublocality', 'route', 'real', 'political', 'of', 'or', 'local', 'locality', 'intersection', '1'], closest_place['types']) types = " ".join(types).replace("_", " ") else: types = None print types business_types_dict[unique_id] = types loader.dump_business_dict(business_types_dict)
def clean_business_types(): business_types_dict = loader.get_business_types() for k in business_types_dict.keys(): if business_types_dict[k] is None: business_types_dict[k] = '' strs = business_types_dict[k].split(' ') s = [] for st in strs: if st not in [ None, 'agency', 'place', 'food', 'point_of_interest', 'establishment', 'sublocality', 'route', 'real', 'political', 'of', 'or', 'local', 'locality', 'intersection', '1' ]: s += [st] if len(s) == 0: business_types_dict[k] = '' else: business_types_dict[k] = ' '.join(s) loader.dump_business_dict(business_types_dict)
print "Score: ", total print "Total: ", max_potential def getPredictionScoreOfTrainingSet(): """ Returns the score of the classifications on the validation set. """ hand_classified_set = loader.get_hand_classifiedset() algo_classified_set = loader.get_algo_classifiedset() total = 0 scores = [] for uid, actual in hand_classified_set.iteritems(): guess = algo_classified_set[uid] scores.append(util.score_prediction(guess, actual)) total = sum(scores) max_potential = len(hand_classified_set.keys()) * 6 return total / float(max_potential) def writeClassification(business_uid, naics_code): loader.write_row_algo_classified_set(business_uid, naics_code) if __name__ == "__main__": naics_items = loader.get_naics_data_for_level(6) business_types = loader.get_business_types() naics_dict = loader.get_naics_dict() featurizer = None manager.run()
return render_template('classifypage.html', business=business, naics_dict=naics_dict, agent=agent) @app.route('/c/<agent>/<test>/<business_uid>/<naics_code>', methods=['POST']) def classifyBusiness(agent, test, business_uid, naics_code): if request.method == 'POST': loader.write_row_classified_set(business_uid, naics_code) if test != 'test': loader.write_row_hand_classified_set(business_uid, naics_code) return redirect('/classifier/' + agent) else: return abort(405) # 405 Method Not Allowed @app.route('/database') @app.route('/database/<int:page>', methods=['GET']) def databaseView(page=1): businesses = dbh.getBusinessPage(page) return render_template('database.html', businesses=businesses, hand_classified_set=hand_classified_set, algo_classified_set=algo_classified_set, naics_dict=naics_dict) if __name__ == "__main__": naics_items = loader.get_naics_data_for_level(6) hand_classified_set = loader.get_hand_classifiedset() algo_classified_set = loader.get_algo_classifiedset() unclassified_business_ids = loader.get_unclassified_business_ids() business_types = loader.get_business_types() naics_dict = loader.get_naics_dict() app.run(debug=True)
def __init__(self): self.google_types = loader.get_business_types() self.model = loader.get_word2vecmodel() print "Loaded Models"