def get_summary(self): res = [] path_dir = self.mars_config.get_base_path() snap_times_list = os.listdir(path_dir) snap_times_list.sort(reverse=True) for snap_time in snap_times_list: try: time_stamp = int(snap_time) time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time_stamp)) cur_summary = { 'time': time_str, GROUPS_NAME: len(get_group(self.mars_config, snap_time)), DEVICE_NAME: len(get_devices(self.mars_config, snap_time)), DEVICE_CONFIG_NAME: len(get_devices_configs(self.mars_config, snap_time)), HOSTS_NAME: len(get_host(self.mars_config, snap_time)), LINKS_NAME: len(get_link(self.mars_config, snap_time)), FLOW_NAME: len(get_flow(self.mars_config, snap_time)), } res.append(cur_summary) except ValueError as e: pass return res
def getData_imdb(path): soup = utils.get_link(path) genre = "" title = "" cast_list = {} rating = "" resume = "" try: title = soup.title.text.strip() title = title.split("(")[0].strip() except AttributeError: pass try: rating = soup.find("span", {"itemprop": "ratingValue"}).text rating = str(float(rating) * 10) except AttributeError: pass try: cast_table = soup.find("table", { "class": "cast_list" }).find_all("span", {"class": "itemprop"}) actors = [] for item in cast_table: actors.append(item.text) characters = [] character_list = soup.find("table", { "class": "cast_list" }).find_all("td", {"class": "character"}) for item in character_list: characters.append(item.a.text) cast_list.update({"actor": actors, "character": characters}) except AttributeError: pass try: resume = soup.find("span", {"itemprop": "description"}).text except AttributeError: pass try: genres = soup.find("div", {"itemprop": "genre"}).find_all("a") genre = [] for item in genres: genre.append(item.text.strip()) except AttributeError: pass all_text = soup.findAll(text=True) page_text = " ".join(filter(utils.visible, all_text)) data = {} data['title'] = title.strip() data['resume'] = resume.strip() data['rate'] = rating.strip() data['genre'] = genre data['cast'] = cast_list data['site_data'] = page_text path = "extractor/imdb" fileName = title utils.writeToJson(fileName, path, data)
def get_data(site, count): try: soup = utils.get_link(site) genre = "" title = "" cast_list = {} rating = "" resume = "" try: title = soup.title.text.strip() title = title.split("-")[0].strip() except AttributeError: pass try: resume = soup.find( "div", { "class": "tvobject-masthead-wrapper content-wrapper" }).find("div", { "class": "tvobject-masthead-description" }).text.strip() except AttributeError: pass try: rating = soup.find("li", { "class": "tvobject-overview-about-line" }).text except AttributeError: pass try: cast = soup.find("div", { "data-section-id": "cast" }).find("div", { "class": "row" }).find_all("div") cast_list = [] for item in cast: cast_list.append(item.text.strip()) except AttributeError: pass all_text = soup.findAll(text=True) page_text = " ".join(filter(utils.visible, all_text)) data = {} data['link'] = site data['title'] = title.strip() data['resume'] = resume.strip() data['rate'] = rating.strip() data['cast'] = cast_list data['site_data'] = page_text path = "extractor/tvguide" fileName = count utils.writeToJson(fileName, path, data) except ConnectionError: pass except requests.exceptions.HTTPError: pass
def get_data(site, count): try: soup = utils.get_link(site) try: title = soup.find( "div", { "class": "col-md-10 col-md-offset-2 col-sm-9 col-sm-offset-3 mobile-title" }).text except AttributeError: pass try: rating = soup.find("div", {"class": "rating"}).text except AttributeError: pass try: genre = str.replace( soup.find("label", text="Genres").parent.text, "Genres", "") except AttributeError: pass try: resume = soup.find("div", {"itemprop": "description"}).text cast_list = [] except AttributeError: pass try: list_actors = soup.find_all("li", {"itemprop": "actor"}) for item in list_actors: name = item.find("h4", itemprop="name").text character = item.find("h4", {"class": "character"}).text cast_list.append([name, character]) except AttributeError: pass data = {} all_text = soup.findAll(text=True) page_text = " ".join(filter(utils.visible, all_text)) data = {} data['link'] = site data['title'] = title.strip() data['resume'] = resume.strip() data['rate'] = rating.strip() data['genre'] = genre data['cast'] = cast_list data['site_data'] = page_text path = "extractor/tracktv" fileName = count utils.writeToJson(fileName, path, data) except ConnectionError: pass except requests.exceptions.HTTPError: pass
def init_snap_data(self, snap_time_str): print('Start to trace the ' + UseStyle(snap_time_str, fore='green', mode='underline') + ' snap data\n') snap_time = format_time_string_2_number(snap_time_str) self.snap_time = snap_time self.device_config_obj = DeviceConfigs.initialize_with( self.mars_config, get_devices_configs(self.mars_config, snap_time)) self.hosts_obj = Hosts.initialize_with( self.mars_config, get_host(self.mars_config, snap_time)) self.group_obj = Groups.initialize_with( self.mars_config, get_group(self.mars_config, snap_time)) self.flow_obj = Flows.initialize_with( self.mars_config, get_flow(self.mars_config, snap_time)) self.link_obj = Links.initialize_with( self.mars_config, get_link(self.mars_config, snap_time))
def dump_employees(filepath): with open(filepath, 'r') as input_csv: reader = csv.reader(input_csv) next(reader) for line in reader: Name = line[1] City = line[2] Cuisine_style = line[3] Ranking = line[4] Rating = line[5] Price_range = line[6] Number_of_reviews = line[7] Reviews = line[8] menu = line[9] menu_link = line[9] address = line[9] add_value(Name, City, Cuisine_style, Ranking, Rating, Price_range, Number_of_reviews, Reviews, menu, get_link(menu_link), get_address(address))
def show_snap_links(self, snap_time_str): snap_time = format_time_string_2_number(snap_time_str) print_normal('Show the ' + UseStyle(snap_time_str, fore='green', mode='underline') + ' links.') link_obj = Links.initialize_with(self.mars_config, get_link(self.mars_config, snap_time)) devices_config_obj = DeviceConfigs.initialize_with( self.mars_config, get_devices_configs(self.mars_config, snap_time)) for device_config in devices_config_obj.get_data(): print_normal_start('Device Name : ' + device_config['name'], color='yellow') for link in link_obj.get_data(): if link['src']['device'] == device_config['id']: print_normal_center( link_to_line_string(link, devices_config_obj)) print_normal_end('') print_normal('')
def get_max(mapList): maxKey = "" maxValue = 0 for key in mapList: if maxValue < mapList[key]: maxValue = mapList[key] maxKey = key return maxKey with open('extractor\sites.txt') as f: lines = f.readlines() f.close() for site in lines: soup = utils.get_link(site) removed = 0 print(clean_page(soup, 0)) ranked_element = {} cast_temp = [] aux = {} cast = get_cast(soup) ranking = [] title = soup.title.text resume = get_sinopses(soup.html.find_all(text=True), 0) print(title) print(clean_text(resume[-1][0])) print(cast) # bodyArray = body.text.strip() # print(bodyArray)
def get_data(site, count): try: soup = utils.get_link(site) genre = [] title = "" rate = "" try: title = soup.find("div", {"class": "title"}).text.strip() title = title.split("(")[0].strip() except AttributeError: pass try: resume = soup.find("div", {"class": "overview"}).text except AttributeError: resume = None try: cast = soup.find("ol", {"class": "people scroller"}).find_all("li") cast_l = [] actor_list = [] character_list = [] for item in cast: cast_l.append(item.text.strip().split("\n")) for item in cast_l: actor_list.append(item[0]) if len(item) == 2: character_list.append(item[1]) except AttributeError: actor_list = [] character_list = [] try: genres = soup.find("section", { "class": "genres right_column" }).find_all("ul") for item in genres: gr = item.text.replace("\n", " ").strip().split(" ") for i in gr: if len(i) > 2: genre.append(i) except AttributeError: genres = [] try: rate = soup.find("div", class_="user_score_chart")['data-percent'] except AttributeError: rate = None data = {} all_text = soup.findAll(text=True) page_text = " ".join(filter(utils.visible, all_text)) data = {} data['link'] = site data['title'] = title.strip() data['resume'] = resume.strip() data['rate'] = rate data['genre'] = genre data['cast'] = actor_list data['character'] = character_list data['site_data'] = page_text path = "extractor/tvmovidDB" fileName = count utils.writeToJson(fileName, path, data) except ConnectionError: pass except requests.exceptions.HTTPError: pass
def get_data(site, count): try: soup = utils.get_link(site) genre = "" title = "" cast_list = {} rate = "" try: title = soup.title.text.strip() title = title.split("-")[0].strip() except AttributeError: pass try: resume = soup.find("div", {"id": "movieSynopsis"}).text.strip() except AttributeError: pass try: genre = soup.find( "td", text="Genre:").parent.text.split("\n")[2].strip() except AttributeError: pass try: cast = soup.find_all("div", {"class": "cast-item media inlineBlock "}) actors_list = [] characters_list = [] for item in cast: actor = item.find("div").find("a").text.strip() actors_list.append(actor) character = str.replace( item.find("span", { "class": "characters subtle smaller" }).text, "as ", "") characters_list.append(character) cast_list.update({ "actor": actors_list, "character": characters_list }) except AttributeError: pass try: rate = soup.find("span", { "class": "meter-value superPageFontColor" }).span.text.strip() except AttributeError: pass data = {} data['link'] = site data['title'] = title data['resume'] = resume data['rate'] = rate data['genre'] = genre data['cast'] = cast_list page_text = utils.text_from_html(soup) data['site_data'] = page_text path = 'extractor/rottentomatoes' fileName = count utils.writeToJson(fileName, path, data) except ConnectionError: pass except requests.exceptions.HTTPError: pass