from database import Firebase firebase = Firebase() count = firebase.get_program_count() admission_list = [] param = 0.10 for pid in range(0, count): res = firebase.get_program_admission_rate(pid) if res >= param: admission_list.append(pid) print admission_list
from database import Firebase firebase = Firebase() count = firebase.get_program_count() city_list = [] param = "Seattle" for pid in range(0, count): res = firebase.get_program_location(pid) if res['city'] == param: city_list.append(pid) print city_list
def __init__(self): # filtered_list=[] self.firebase = Firebase() self.count = self.firebase.get_program_count()
from database import Firebase firebase = Firebase() count = firebase.get_program_count() verbal_list=[] param=150 for pid in range(0, count): res=firebase.get_program_acad(pid) for each in res: if each=='gre': temp=res[each] val=temp['verbal'] if val<=param: verbal_list.append(pid) print verbal_list
from database import Firebase firebase = Firebase() count = firebase.get_program_count() fees_in_list = [] param = 50000 for pid in range(0, count): res = firebase.get_program_fees(pid) if res['in_state'] <= param: fees_in_list.append(pid) print fees_in_list
class Filters(object): def __init__(self): # filtered_list=[] self.firebase = Firebase() self.count = self.firebase.get_program_count() def filter_rank(self, param): param = int(param) rank_list = [] for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['cwur'] <= param: rank_list.append(pid) if res['usnews'] <= param: rank_list.append(pid) if res['forbes'] <= param: rank_list.append(pid) if res['times'] <= param: rank_list.append(pid) rank_final = set(rank_list) return list(rank_final) def filter_rank_absolute(self, param): param = int(param) rank_list = [] for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['cwur'] <= param and res['usnews'] <= param and res[ 'forbes'] <= param and res['times'] <= param: rank_list.append(pid) return rank_list def filter_rank_overall(self, param): param = int(param) overall_list = [] for pid in range(0, param + 1): res = self.firebase.get_program_rank(pid) overall_list.append(pid) return overall_list def filter_rank_usnews(self, param): usnews_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['usnews'] <= param: usnews_list.append(pid) return usnews_list def filter_rank_cwur(self, param): cwur_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['cwur'] <= param: cwur_list.append(pid) return cwur_list def filter_rank_forbes(self, param): forbes_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['forbes'] <= param: forbes_list.append(pid) return forbes_list def filter_rank_times(self, param): times_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['times'] <= param: times_list.append(pid) return times_list # 2. Location #{u'city': u'Philadelphia', u'region_id': 2, u'zip': 19104, u'lon': -75.193618, u'state': u'PA', u'lat': 39.951002, u'region_name': u'Mid East'} def filter_location_state(self, param): state_list = [] for each in param: for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['state'] == each: state_list.append(pid) return state_list # return list(set(state_list)&set(filtered_list)); def filter_location_city(self, param): city_list = [] for each in param: for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['city'] == param: city_list.append(pid) return city_list # return list(set(city_list)&set(filtered_list)); def filter_location_zip(self, param): zip_list = [] for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['zip'] == param: zip_list.append(pid) return zip_list # return list(set(zip_list)&set(filtered_list)); def filter_location_region(self, param): region_list = [] region_dict = { "north_east": ["CT", "ME", "MA", "NH", "RI", "VT"], "south": [ "DE", "DC", "MD", "AL", "AR", "FL", "GA", "KY", "LA", "MS", "NC", "SC", "TN", "VA", "WV" ], "mid_west": [ "IL", "IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD" ], "west": [ "AZ", "NM", "CO", "ID", "MT", "UT", "WY", "AK", "CA", "HI", "NV", "OR", "WA" ] } for each in param: states_list = region_dict[each] for every_state in states_list: for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['state'] == every_state: region_list.append(pid) return region_list # return list(set(resion_list)&set(filtered_list)); # 3. Fees #{u'out_of_state': 63000, u'in_state': 42690} def filter_fees_in_state(self, param): fees_in_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_fees(pid) if res['in_state'] <= param: fees_in_list.append(pid) return fees_in_list # return list(set(fees_in_list)&set(filtered_list)); def filter_fees_out_state(self, param): fees_out_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_fees(pid) if int(res['out_of_state']) <= int(param): fees_out_list.append(pid) return fees_out_list # return list(set(fees_out_list)&set(filtered_list));6 def filter_budget(self, budget): budget_list = [] budget = int(budget) for pid in range(0, self.count): res1 = self.firebase.get_program_details(pid) duration = float(res1['length']) res2 = self.firebase.get_program_fees(pid) total_out_of_state_fees = float(res2['out_of_state']) if duration > 12: yearly_out_of_state = float( 12.0 / duration) * total_out_of_state_fees else: yearly_out_of_state = total_out_of_state_fees res3 = self.firebase.get_program_living(pid) total_living = float(res3['overall']) total_cost_per_year = yearly_out_of_state + total_living if total_cost_per_year < budget: budget_list.append(pid) return budget_list # 4. Score #{u'gre': {u'quant': 167, u'verbal': 159}, u'gpa': 3} def filter_gpa(self, param): gpa_list = [] param = float(param) for pid in range(0, self.count): res = self.firebase.get_program_acad(pid) if res['gpa'] <= param: gpa_list.append(pid) return gpa_list # return list(set(fees_list)&set(filtered_list)); def filter_gre_verbal(self, param): verbal_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_acad(pid) for each in res: if each == 'gre': temp = res[each] val = temp['verbal'] if val <= param: verbal_list.append(pid) return verbal_list # return list(set(fees_list)&set(filtered_list)); def filter_gre_quant(self, param): quant_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_acad(pid) for each in res: if each == 'gre': temp = res[each] val = temp['quant'] if val <= param: quant_list.append(pid) return quant_list # return list(set(quant_list)&set(filtered_list)); # 5. Living #{u'boarding': 14601, u'books': 1425, u'overall': 19351, u'other': 3325} def filter_boarding(self, param): boarding_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_living(pid) if res['boarding'] <= param: boarding_list.append(pid) return boarding_list # return list(set(boarding_list)&set(filtered_list)); def filter_books(self, param): books_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_living(pid) if res['books'] <= param: books_list.append(pid) return books_list # return list(set(books_list)&set(filtered_list)); def filter_overall_expenses(self, param): overall_list = [] param = int(param) for pid in range(0, self.count): res = self.firebase.get_program_living(pid) if res['overall'] <= param: overall_list.append(pid) return overall_list # return list(set(overall_list)&set(filtered_list)); # 6. Admission Rate # Filter by Admission Rate def filter_admission_rate(self, param): admission_list = [] param = float(param) for pid in range(0, self.count): res = self.firebase.get_program_admission_rate(pid) if res >= param: admission_list.append(pid) return admission_list
class Recommend(object): """Class for Selecting Programs Matching Filters.""" def __init__(self, overall_rank=None, rank=None, usnews=None, cwur=None, forbes=None, times=None, state=None, city=None, zipcode=None, region=None, budget=None, location=None, in_state=None, out_of_state=None, gpa=None, verbal=None, quant=None, boarding=None, books=None, overall_expenses=None, admission_rate=None, aoi=None): """Initializing Filter Class.""" self.rank = rank self.overall_rank = overall_rank self.usnews = usnews self.cwur = cwur self.forbes = forbes self.times = times self.state = state self.city = city self.zipcode = zipcode self.region = region self.location = location self.in_state = in_state self.out_of_state = out_of_state self.gpa = gpa self.verbal = verbal self.quant = quant self.budget = budget self.boarding = boarding self.books = books self.overall_expenses = overall_expenses self.admission_rate = admission_rate self.aoi = aoi # List of programs matching atleast one of the requested criteria self.unique_programs = None # List of programs meeting all requested criteria self.common_programs = [] # Verbose program dict with all matching info self.matches = None # creating the object of Filters class self.filters = Filters() # Program Count self.count = self.filters.count self.firebase = Firebase() def filter_programs(self): """Filter the programs.""" # Deploy Filter Selection specializations, user_selected_criteria = self.run_selected_filters() # Getting all the unique programs self.unique_programs = list(self.matches.keys()) # Getting all common program intersection common = set.intersection(*map(set, self.common_programs)) # Validating we atleast have one program matching one criteria # Fall back to all programs if no match is found if len(self.unique_programs) < 1: # Setting unique programs to all programs self.unique_programs = [] self.unique_programs.extend(range(0, self.count - 1)) # Setting all matches to zero self.matches = [] self.matches = dict.fromkeys(self.unique_programs, 1) # Setting all common programs to all programs common = [] common = common.extend(range(0, self.count - 1)) return (common, self.unique_programs, self.matches, specializations, user_selected_criteria) def update_results(self, matching_programs): """Helper method to update matching programs results.""" self.common_programs.append(matching_programs) for program in matching_programs: # Update the verbose program dict of results if self.matches is None: self.matches = {} self.matches[program] = 1 elif program in self.matches: self.matches[program] += 1 else: self.matches[program] = 1 def run_selected_filters(self): specializations = [] user_selected_criteria = [] #Based on the UI Options if self.rank is not None: matching_programs = self.filters.filter_rank_usnews(self.rank) self.update_results(matching_programs) matching_programs = self.filters.filter_rank_cwur(self.rank) self.update_results(matching_programs) matching_programs = self.filters.filter_rank_forbes(self.rank) self.update_results(matching_programs) matching_programs = self.filters.filter_rank_times(self.rank) self.update_results(matching_programs) rank_list = ["usnews", "cwur", "forbes", "times"] user_selected_criteria.extend(rank_list) if self.budget is not None: matching_programs = self.filters.filter_fees_out_state(self.budget) self.update_results(matching_programs) matching_programs = self.filters.filter_fees_in_state(self.budget) self.update_results(matching_programs) matching_programs = self.filters.filter_boarding(self.budget) self.update_results(matching_programs) matching_programs = self.filters.filter_books(self.budget) self.update_results(matching_programs) user_selected_criteria.append("in_state") user_selected_criteria.append("out_of_state") user_selected_criteria.append("boarding") user_selected_criteria.append("books") if self.aoi is not None: specializations.extend(self.aoi) if self.location is not None: matching_programs = self.filters.filter_location_region( self.location) self.update_results(matching_programs) """Individual filters.""" if self.overall_rank is not None: matching_programs = self.filters.filter_rank_overall( self.overall_rank) self.update_results(matching_programs) if self.usnews is not None: matching_programs = self.filters.filter_rank_usnews(self.usnews) self.update_results(matching_programs) if self.cwur is not None: matching_programs = self.filters.filter_rank_cwur(self.cwur) self.update_results(matching_programs) if self.forbes is not None: matching_programs = self.filters.filter_rank_forbes(self.forbes) self.update_results(matching_programs) if self.times is not None: matching_programs = self.filters.filter_rank_times(self.times) self.update_results(matching_programs) if self.state is not None: matching_programs = self.filters.filter_location_state(self.state) self.update_results(matching_programs) if self.city is not None: matching_programs = self.filters.filter_location_city(self.city) self.update_results(matching_programs) if self.zipcode is not None: matching_programs = self.filters.filter_location_zip(self.zipcode) self.update_results(matching_programs) if self.in_state is not None: matching_programs = self.filters.filter_fees_in_state( self.in_state) self.update_results(matching_programs) if self.out_of_state is not None: matching_programs = self.filters.filter_fees_out_state( self.out_of_state) self.update_results(matching_programs) if self.gpa is not None: matching_programs = self.filters.filter_gpa(self.gpa) self.update_results(matching_programs) if self.verbal is not None: matching_programs = self.filters.filter_gre_verbal(self.verbal) self.update_results(matching_programs) if self.quant is not None: matching_programs = self.filters.filter_gre_quant(self.quant) self.update_results(matching_programs) if self.boarding is not None: matching_programs = self.filters.filter_boarding(self.boarding) self.update_results(matching_programs) if self.books is not None: matching_programs = self.filters.filter_books(self.books) self.update_results(matching_programs) if self.overall_expenses is not None: matching_programs = self.filters.filter_overall_expenses( self.overall_expenses) self.update_results(matching_programs) if self.admission_rate is not None: matching_programs = self.filters.filter_admission_rate( self.admission_rate) self.update_results(matching_programs) return specializations, user_selected_criteria def get_user_criteria(self): if self.rank is not None: self.user_selected_criteria.extend("usnews") self.user_selected_criteria.extend("forbes") self.user_selected_criteria.extend("times") self.user_selected_criteria.extend("cwur") if self.in_state is not None: self.user_selected_criteria.extend("in_state") self.user_selected_criteria.extend("out_of_state") self.user_selected_criteria.extend("boarding") self.user_selected_criteria.extend("books") self.user_selected_criteria.extend("admission_rate") if self.gpa is not None: self.user_selected_criteria.extend("gpa") if self.verbal is not None: self.user_selected_criteria.extend("verbal") if self.quant is not None: self.user_selected_criteria.extend("quant") if self.aoi is not None: self.specializations_list.extend(aoi) else: self.specializations_list = [ 'information_assurance_cyber_security', 'business_intelligence', 'computer_networks', 'web_application_development', 'library_science', 'management_consulting', 'human_center_design_engineering', 'information_architecture', 'software_engineering', 'data_science_analytics', 'distributed_systems' ] return (self.user_selected_criteria, self.specializations_list) #Getting the json list after the filtering def get_filtered_json(self, program_list): json_list = [] for each in program_list: result = self.firebase.get_detailed_program(each) json_list.append(result) return json_list def construct_dataframe(self, json_list, program_list): df = pd.io.json.json_normalize(json_list) df.columns = df.columns.map(lambda x: x.split(".")[-1]) c_list = list(df.columns.values) col_list = [] for each in c_list: each = each.encode("utf-8") col_list.append(each) df = df.loc[:, [ 'gpa', 'quant', 'verbal', 'admission_rate', 'business_intelligence', 'computer_networks', 'data_science_analytics', 'distributed_systems', 'human_center_design_engineering', 'information_architecture', 'information_assurance_cyber_security', 'library_science', 'management_consulting', 'software_engineering', 'web_application_development', 'in_state', 'out_of_state', 'boarding', 'books', 'other', 'length', 'cwur', 'forbes', 'times', 'usnews' ]] df["program_id"] = program_list return df def recommend_programs(self): set_list, program_list, program_dict, specializations, user_selected_criteria = self.filter_programs( ) json_list = self.get_filtered_json(program_list) df = self.construct_dataframe(json_list, program_list) #user_selected_criteria=["forbes","out_of_state"] #specializations_list=['information_assurance_cyber_security', # 'business_intelligence', # 'computer_networks'] #user_selected_criteria, specializations_list=self.get_user_criteria() match = MatchingAlgo(df, program_list, specializations, user_selected_criteria) recommendations = match.rank_programs() return recommendations
def __init__(self, overall_rank=None, rank=None, usnews=None, cwur=None, forbes=None, times=None, state=None, city=None, zipcode=None, region=None, budget=None, location=None, in_state=None, out_of_state=None, gpa=None, verbal=None, quant=None, boarding=None, books=None, overall_expenses=None, admission_rate=None, aoi=None): """Initializing Filter Class.""" self.rank = rank self.overall_rank = overall_rank self.usnews = usnews self.cwur = cwur self.forbes = forbes self.times = times self.state = state self.city = city self.zipcode = zipcode self.region = region self.location = location self.in_state = in_state self.out_of_state = out_of_state self.gpa = gpa self.verbal = verbal self.quant = quant self.budget = budget self.boarding = boarding self.books = books self.overall_expenses = overall_expenses self.admission_rate = admission_rate self.aoi = aoi # List of programs matching atleast one of the requested criteria self.unique_programs = None # List of programs meeting all requested criteria self.common_programs = [] # Verbose program dict with all matching info self.matches = None # creating the object of Filters class self.filters = Filters() # Program Count self.count = self.filters.count self.firebase = Firebase()
from database import Firebase firebase = Firebase() count = firebase.get_program_count() overall_list=[] param=10 ''' Overall rank is the sum of all ranks. Hence, it does not start from 1 and ends at 59. Thus, to get to 10 programs, we need to simply iterate over the top 10 program id ''' for pid in range(0, param+1): res=firebase.get_program_rank(pid) overall_list.append(pid) print overall_list
from database import Firebase firebase = Firebase() count = firebase.get_program_count() overall_list = [] param = 30000 for pid in range(0, count): res = firebase.get_program_living(pid) if res['overall'] <= param: overall_list.append(pid) print overall_list
class Filters(object): def __init__(self): # filtered_list=[] self.firebase = Firebase() self.count = self.firebase.get_program_count() def filter_rank_overall(self, param): overall_list = [] for pid in range(0, param + 1): res = self.firebase.get_program_rank(pid) overall_list.append(pid) return overall_list def filter_rank_usnews(self, param): usnews_list = [] for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['usnews'] <= param: usnews_list.append(pid) return usnews_list def filter_rank_cwur(self, param): cwur_list = [] for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['cwur'] <= param: cwur_list.append(pid) return cwur_list def filter_rank_forbes(self, param): forbes_list = [] for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['forbes'] <= param: forbes_list.append(pid) return forbes_list def filter_rank_times(self, param): times_list = [] for pid in range(0, self.count): res = self.firebase.get_program_rank(pid) if res['times'] <= param: times_list.append(pid) return times_list # 2. Location #{u'city': u'Philadelphia', u'region_id': 2, u'zip': 19104, u'lon': -75.193618, u'state': u'PA', u'lat': 39.951002, u'region_name': u'Mid East'} def filter_location_state(self, param): state_list = [] for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['state'] == param: state_list.append(pid) return state_list # return list(set(state_list)&set(filtered_list)); def filter_location_city(self, param): city_list = [] for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['city'] == param: city_list.append(pid) return city_list # return list(set(city_list)&set(filtered_list)); def filter_location_zip(self, param): zip_list = [] for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['zip'] == param: zip_list.append(pid) return zip_list # return list(set(zip_list)&set(filtered_list)); def filter_location_region(self, param): region_list = [] for pid in range(0, self.count): res = self.firebase.get_program_location(pid) if res['region_name'] == param: region_list.append(pid) return region_list # return list(set(resion_list)&set(filtered_list)); # 3. Fees #{u'out_of_state': 63000, u'in_state': 42690} def filter_fees_in_state(self, param): fees_in_list = [] for pid in range(0, self.count): res = self.firebase.get_program_fees(pid) if res['in_state'] <= param: fees_in_list.append(pid) return fees_in_list # return list(set(fees_in_list)&set(filtered_list)); def filter_fees_out_state(self, param): fees_out_list = [] for pid in range(0, self.count): res = self.firebase.get_program_fees(pid) if int(res['out_of_state']) <= int(param): fees_out_list.append(pid) return fees_out_list # return list(set(fees_out_list)&set(filtered_list)); # 4. Score #{u'gre': {u'quant': 167, u'verbal': 159}, u'gpa': 3} def filter_gpa(self, param): gpa_list = [] for pid in range(0, self.count): res = self.firebase.get_program_acad(pid) if res['gpa'] <= param: gpa_list.append(pid) return gpa_list # return list(set(fees_list)&set(filtered_list)); def filter_gre_verbal(self, param): verbal_list = [] for pid in range(0, self.count): res = self.firebase.get_program_acad(pid) for each in res: if each == 'gre': temp = res[each] val = temp['verbal'] if val <= param: verbal_list.append(pid) return verbal_list # return list(set(fees_list)&set(filtered_list)); def filter_gre_quant(self, param): quant_list = [] for pid in range(0, self.count): res = self.firebase.get_program_acad(pid) for each in res: if each == 'gre': temp = res[each] val = temp['quant'] if val <= param: quant_list.append(pid) return quant_list # return list(set(quant_list)&set(filtered_list)); # 5. Living #{u'boarding': 14601, u'books': 1425, u'overall': 19351, u'other': 3325} def filter_boarding(self, param): boarding_list = [] for pid in range(0, self.count): res = self.firebase.get_program_living(pid) if res['boarding'] <= param: boarding_list.append(pid) return boarding_list # return list(set(boarding_list)&set(filtered_list)); def filter_books(self, param): books_list = [] for pid in range(0, self.count): res = self.firebase.get_program_living(pid) if res['books'] <= param: books_list.append(pid) return books_list # return list(set(books_list)&set(filtered_list)); def filter_overall_expenses(self, param): overall_list = [] for pid in range(0, self.count): res = self.firebase.get_program_living(pid) if res['overall'] <= param: overall_list.append(pid) return overall_list # return list(set(overall_list)&set(filtered_list)); # 6. Admission Rate # Filter by Admission Rate def filter_admission_rate(self, param): admission_list = [] for pid in range(0, self.count): res = self.firebase.get_program_admission_rate(pid) if res >= param: admission_list.append(pid) return admission_list # return list(set(admission_list)&set(filtered_list)); '''
#!/usr/bin/env python """data.gradscout.co Flask REST API v1""" from flask import Flask, jsonify, abort, make_response, request, url_for from database import Firebase app = Flask(__name__) firebase = Firebase() @app.route('/api/programs/<int:program_id>', methods=['GET']) def get_detailed_program(program_id): """Get the Detailed Program Details for a given Program ID.""" return jsonify(firebase.get_detailed_program(program_id)) @app.route('/api/programs/', methods=['GET']) def get_all_programs(): """Get complete data for quicker cached access. Returns: programs (json): JSON Data of Complete Program Details """ return jsonify(firebase.data) @app.route('/api/programs/count/', methods=['GET']) def get_program_count(): """Get the total count of programs available in the database. Returns:
"""Module for cleaning courses and extracting metadata.""" import re import json import math import pprint from nltk import pos_tag from database import Firebase from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import PorterStemmer, WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer stemmer = PorterStemmer() lemmatiser = WordNetLemmatizer() firebase = Firebase() STOPWORDS = set(stopwords.words('english')) class GetData(object): """Get the Input Data.""" def __init__(self): """Initializing with the data files.""" cur_data_file = ("/Users/bagursreenivasamurth/Dev/gradscout/" "data/data_for_database/firebase_export.json") course_data_file = ( "/Users/bagursreenivasamurth/Dev/gradscout/data/curriculum/cur_data.json") self.cur_json_data = self._read_json_data(cur_data_file)