def create_paths(self): for origin in range(0, self.network.size): self.shortest_paths.append(self.init_shortest_path(origin)) for origin in range(0, self.network.size): self.shortest_paths[origin] = self.ask_neighbor(origin, self.shortest_paths[origin]) for origin in range(0, self.network.size): k = [] for i in self.shortest_paths[origin]: k.append(i) while self.INF in self.shortest_paths[origin]: self.shortest_paths[origin] = self.ask_neighbor( origin, self.shortest_paths[origin], utils.get_max(k)) k[utils.get_max(k)] = self.INF all_inf = True for i in k: if i < self.INF: all_inf = False if all_inf: k = [] for i in self.shortest_paths[origin]: k.append(i) for i in self.num_transmissions: self.avg_num_transmissions += i self.avg_num_transmissions /= len(self.num_transmissions) count = 0 for i in self.shortest_paths: for j in i: count += 1 self.avg_shortest_paths += j self.avg_shortest_paths /= count
def create_paths(self): for origin in range(0, self.network.size): self.shortest_paths.append(self.init_shortest_path(origin)) for origin in range(0, self.network.size): self.shortest_paths[origin] = self.ask_neighbor( origin, self.shortest_paths[origin]) for origin in range(0, self.network.size): k = [] for i in self.shortest_paths[origin]: k.append(i) while self.INF in self.shortest_paths[origin]: self.shortest_paths[origin] = self.ask_neighbor( origin, self.shortest_paths[origin], utils.get_max(k)) k[utils.get_max(k)] = self.INF all_inf = True for i in k: if i < self.INF: all_inf = False if all_inf: k = [] for i in self.shortest_paths[origin]: k.append(i) for i in self.num_transmissions: self.avg_num_transmissions += i self.avg_num_transmissions /= len(self.num_transmissions) count = 0 for i in self.shortest_paths: for j in i: count += 1 self.avg_shortest_paths += j self.avg_shortest_paths /= count
def rhinge_loss(self, X, targets): """ returns reverse hinge loss of points in X and their targets """ preds = np.matmul(X, self.weights.T) + self.bias res = [] for i in xrange(len(X)): target = targets[i] if np.argmax(preds) != target: max_ix, max_val = get_max(preds[i], target) loss = max_val - preds[i][target] else: loss = 0 res.append(loss) return res
def untargeted_loss(self, X, Y): """ computes the untargeted hinge loss of (X, Y) """ preds = np.matmul(X, self.weights.T) + self.bias n = len(X) loss = [] for i in xrange(n): y = Y[i] others = range(self.num_classes) del others[y] if np.argmax(preds[i]) != y: res = 0 else: max_val = get_max(preds[i], y)[1] y_val = preds[i][y] res = y_val - max_val loss.append(res) return np.array(loss)
def gradient_untargeted(self, X, Y): """ computes gradients the untargeted hinge loss of (X, Y) """ preds = np.matmul(X, self.weights.T) + self.bias n = len(X) gradient = [] for i in xrange(n): y = Y[i] others = range(self.num_classes) del others[y] if np.argmax(preds[i]) != y: res = np.zeros(self.dim) else: max_ix = get_max(preds[i], y)[0] w_max = self.weights[max_ix] w_y = self.weights[y] res = w_y - w_max gradient.append(res) return np.array(gradient)
def gradient(self, X, targets): """ returns gradient of the reverse (targeted) hinge loss """ preds = np.matmul(X, self.weights.T) + self.bias n = X.shape[0] gradient = [] for i in xrange(n): target = targets[i] others = range(self.num_classes) del others[target] if np.argmax(preds[i]) == target: res = np.zeros(self.dim) else: max_ix = get_max(preds[i], target)[0] w_max = self.weights[max_ix] w_target = self.weights[target] res = w_max - w_target gradient.append(res) return np.array(gradient)
def get_data(): base_url = "http://www.uprb.edu" url = "http://www.uprb.edu/es/academico/registro/horarioacad/horarioacad.htm" html = requests.get(url).text soup = BeautifulSoup(html, "html.parser") periods = [] results = soup.find(id="Menu Secundario").find_all("td") # Periods for e in results[1:]: anchor = e.find("a") if anchor: period = {} title = re.sub(r"\s+", " ", anchor.text.replace("\r\n", "")) period["name"] = title.split("-")[0].strip() period["term"] = title.split("-")[1].strip() period["url"] = url.replace(url.split("/")[-1], "") + anchor["href"] periods.append(period) # Departments for period in periods: html = requests.get(period["url"]).text soup = BeautifulSoup(html, "html.parser") departments = [] url = period["url"] for e in soup.find_all("table")[3].find_all("a"): department = {} department["name"] = re.sub(r"\s+", " ", e.text.replace("\r\n", "")) if e["href"].startswith("/"): department["url"] = base_url + e["href"] else: department["url"] = url.replace(url[url.rindex("/") : :], "/") + e["href"] departments.append(department) period["departments"] = departments # Courses for period in periods: print "\n" + period["name"] print "====================" for department in period["departments"]: courses = [] dep_url = department["url"] html = requests.get(dep_url).text soup = BeautifulSoup(html, "html.parser") anchor = soup.find("iframe").find("a")["href"] url = dep_url.replace(dep_url[dep_url.rindex("/") : :], "/") + anchor html = requests.get(url).text soup = BeautifulSoup(html, "html.parser") print department["name"] if soup.find("body"): period["year"] = re.search(r"\d{4}-\d{4}", soup.find("h4").font.text).group(0) titles = soup.find("pre").font.find_all("b") [e.extract() for e in soup.find("pre").font.find_all("b")] [e.replaceWithChildren() for e in soup.find("pre").font.find_all("hr")] idx = 0 for course in re.split(r"\r\n\r\n", soup.find("pre").font.text)[1:]: course_obj = {} header = re.sub(r"\s+", " ", titles[idx].text.replace("\r\n", "")) course_obj["id"] = re.search(r"[A-Z]{4}-\d{4}-.{3}", header).group(0) header = re.sub(r"[A-Z]{4}-\d{4}-[A-Z].{3}", "", header) course_obj["credits"] = re.search(r"\d+\.\d-?(\d+\.\d)?", header).group(0) header = re.sub(r"\d\.\d", "", header) course_obj["type"] = re.sub( r"SS", "", re.search(r"\s(LEC|LAB|INT|PRA|SEM)\sSS", header).group(0) ).strip() header = header[: re.finditer(r"\s(LEC|LAB|INT|PRA|SEM)\sSS", header).next().start(0)] course_obj["title"] = header.strip() ################## ALL ROWS ###################### time_periods = [] notes = [] reserves = [] last = "" for row in course.split("\r\n"): row = row.strip() # check freshmen if re.search(r"FRESHMEN", row): course_obj["freshmen"] = True # check unex if re.search(r"UNEX", row): course_obj["unex"] = True if re.search(r"HOR\s\d", row): last = "HOR" time_periods.append(get_time_period(row)) elif re.search(r"INSTRUCTOR", row): last = "INSTRUCTOR" course_obj["instructor"] = get_instructor(row) elif re.search(r"NOTA\s\d", row): last = "NOTA" notes.append(get_note(row)) elif re.search(r"NO\sACEPTARA", row): last = "NO ACEPTARA" course_obj["no-aceptara"] = get_no_aceptara(row) elif re.search(r"RESERVADO\(S\)", row): last = "RESERVADO" reserves.append(get_reserve(row)) elif re.search(r"MAXIMO", row): last = "MAXIMO" course_obj["max"] = get_max(row) elif re.search(r"CO-REQUISITOS", row): last = "CO-REQUISITOS" course_obj["co-requisitos"] = get_co_requisitos(row) elif last == "PRE-REQUISITOS" or re.search(r"PRE-REQUISITOS", row): if last == "PRE-REQUISITOS": course_obj["pre-requisitos"].extend(get_pre_requisitos(row, True)) else: course_obj["pre-requisitos"] = get_pre_requisitos(row, False) last = "PRE-REQUISITOS" if "freshmen" not in course_obj: course_obj["freshmen"] = False if "unex" not in course_obj: course_obj["unex"] = False if "co-requisitos" not in course_obj: course_obj["co-requisitos"] = [] if "pre-requisitos" not in course_obj: course_obj["pre-requisitos"] = [] if "max" not in course_obj: course_obj["max"] = None # Add all arrays to the course object course_obj["time_periods"] = time_periods course_obj["notes"] = notes course_obj["reserves"] = reserves # clean up on pre-requisitos start_idx = 0 zero_count = 0 for i, e in enumerate(course_obj["pre-requisitos"]): if e["id"] == 0: zero_count = zero_count + 1 if zero_count == 2: start_idx = i break if start_idx > 0: while start_idx < len(course_obj["pre-requisitos"]): previous_id = course_obj["pre-requisitos"][start_idx - 1]["id"] course_obj["pre-requisitos"][start_idx]["id"] = previous_id + 1 start_idx = start_idx + 1 for e in course_obj["pre-requisitos"]: y_or_o = re.search(r"\(.", e["course"]).group(0)[1:] e["y_or_o"] = y_or_o e["course"] = re.sub(r"\(.", "", e["course"]) courses.append(course_obj) idx = idx + 1 department["courses"] = courses else: department["courses"] = [] with open("data.json", "w") as outfile: json.dump(periods, outfile)
def get_data(): base_url = 'http://www.uprb.edu' url = 'http://www.uprb.edu/es/academico/registro/horarioacad/horarioacad.htm' html = requests.get(url).text soup = BeautifulSoup(html, 'html.parser') periods = [] results = soup.find(id='Menu Secundario').find_all('td') # Periods for e in results[1:]: anchor = e.find('a') if anchor: period = {} title = re.sub(r'\s+', ' ', anchor.text.replace('\r\n', '')) period['name'] = title.split('-')[0].strip() period['term'] = title.split('-')[1].strip() period['url'] = url.replace(url.split('/')[-1], '') + anchor['href'] periods.append(period) # Departments for period in periods: html = requests.get(period['url']).text soup = BeautifulSoup(html, 'html.parser') departments = [] url = period['url'] for e in soup.find_all('table')[3].find_all('a'): department = {} department['name'] = re.sub(r'\s+', ' ', e.text.replace('\r\n', '')) if e['href'].startswith('/'): department['url'] = base_url + e['href'] else: department['url'] = url.replace(url[url.rindex('/')::], '/') + e['href'] departments.append(department) period['departments'] = departments # Courses for period in periods: print '\n' + period['name'] print '====================' for department in period['departments']: courses = [] dep_url = department['url'] html = requests.get(dep_url).text soup = BeautifulSoup(html, 'html.parser') anchor = soup.find('iframe').find('a')['href'] url = dep_url.replace(dep_url[dep_url.rindex('/')::], '/') + anchor html = requests.get(url).text soup = BeautifulSoup(html, 'html.parser') print department['name'] if soup.find('body'): period['year'] = re.search( r'\d{4}-\d{4}', soup.find('h4').font.text).group(0) titles = soup.find('pre').font.find_all('b') [e.extract() for e in soup.find('pre').font.find_all('b')] [e.replaceWithChildren() for e in soup.find('pre').font.find_all('hr')] idx = 0 for course in re.split(r'\r\n\r\n', soup.find('pre').font.text)[1:]: course_obj = {} header = re.sub(r'\s+', ' ', titles[idx].text.replace('\r\n', '')) course_obj['id'] = re.search(r'[A-Z]{4}-\d{4}-.{3}', header).group(0) header = re.sub(r'[A-Z]{4}-\d{4}-[A-Z].{3}', '', header) course_obj['credits'] = re.search(r'\d+\.\d-?(\d+\.\d)?', header).group(0) header = re.sub(r'\d\.\d', '', header) course_obj['type'] = re.sub(r'SS', '', re.search(r'\s(LEC|LAB|INT|PRA|SEM)\sSS', header).group(0)).strip() header = header[:re.finditer(r'\s(LEC|LAB|INT|PRA|SEM)\sSS', header).next().start(0)] course_obj['title'] = header.strip() ################## ALL ROWS ###################### time_periods = [] notes = [] reserves = [] last = '' for row in course.split('\r\n'): row = row.strip() # check freshmen if re.search(r'FRESHMEN', row): course_obj['freshmen'] = True # check unex if re.search(r'UNEX', row): course_obj['unex'] = True if re.search(r'HOR\s\d', row): last = 'HOR' time_periods.append(get_time_period(row)) elif re.search(r'INSTRUCTOR', row): last = 'INSTRUCTOR' course_obj['instructor'] = get_instructor(row) elif re.search(r'NOTA\s\d', row): last = 'NOTA' notes.append(get_note(row)) elif re.search(r'NO\sACEPTARA', row): last = 'NO ACEPTARA' course_obj['no-aceptara'] = get_no_aceptara(row) elif re.search(r'RESERVADO\(S\)', row): last = 'RESERVADO' reserves.append(get_reserve(row)) elif re.search(r'MAXIMO', row): last = 'MAXIMO' course_obj['max'] = get_max(row) elif re.search(r'CO-REQUISITOS', row): last = 'CO-REQUISITOS' course_obj['co-requisitos'] = get_co_requisitos(row) elif last == 'PRE-REQUISITOS' or re.search(r'PRE-REQUISITOS', row): if last == 'PRE-REQUISITOS': course_obj['pre-requisitos'].extend(get_pre_requisitos(row, True)) else: course_obj['pre-requisitos'] = get_pre_requisitos(row, False) last = 'PRE-REQUISITOS' if 'freshmen' not in course_obj: course_obj['freshmen'] = False if 'unex' not in course_obj: course_obj['unex'] = False if 'co-requisitos' not in course_obj: course_obj['co-requisitos'] = [] if 'pre-requisitos' not in course_obj: course_obj['pre-requisitos'] = [] if 'max' not in course_obj: course_obj['max'] = None # Add all arrays to the course object course_obj['time_periods'] = time_periods course_obj['notes'] = notes course_obj['reserves'] = reserves # clean up on pre-requisitos start_idx = 0 zero_count = 0 for i, e in enumerate(course_obj['pre-requisitos']): if e['id'] == 0: zero_count = zero_count + 1 if zero_count == 2: start_idx = i break if start_idx > 0: while start_idx < len(course_obj['pre-requisitos']): previous_id = course_obj['pre-requisitos'][start_idx-1]['id'] course_obj['pre-requisitos'][start_idx]['id'] = previous_id+1 start_idx = start_idx + 1 for e in course_obj['pre-requisitos']: y_or_o = re.search(r'\(.', e['course']).group(0)[1:] e['y_or_o'] = y_or_o e['course'] = re.sub(r'\(.', '', e['course']) courses.append(course_obj) idx = idx + 1 department['courses'] = courses else: department['courses'] = [] with open('data.json', 'w') as outfile: json.dump(periods, outfile)