Exemple #1
0
    def get_summary(self):
        res = []
        path_dir = self.mars_config.get_base_path()
        snap_times_list = os.listdir(path_dir)
        snap_times_list.sort(reverse=True)

        for snap_time in snap_times_list:
            try:
                time_stamp = int(snap_time)
                time_str = time.strftime('%Y-%m-%d %H:%M:%S',
                                         time.localtime(time_stamp))
                cur_summary = {
                    'time':
                    time_str,
                    GROUPS_NAME:
                    len(get_group(self.mars_config, snap_time)),
                    DEVICE_NAME:
                    len(get_devices(self.mars_config, snap_time)),
                    DEVICE_CONFIG_NAME:
                    len(get_devices_configs(self.mars_config, snap_time)),
                    HOSTS_NAME:
                    len(get_host(self.mars_config, snap_time)),
                    LINKS_NAME:
                    len(get_link(self.mars_config, snap_time)),
                    FLOW_NAME:
                    len(get_flow(self.mars_config, snap_time)),
                }
                res.append(cur_summary)
            except ValueError as e:
                pass

        return res
Exemple #2
0
def getData_imdb(path):
    soup = utils.get_link(path)
    genre = ""
    title = ""
    cast_list = {}
    rating = ""
    resume = ""

    try:
        title = soup.title.text.strip()
        title = title.split("(")[0].strip()
    except AttributeError:
        pass
    try:
        rating = soup.find("span", {"itemprop": "ratingValue"}).text
        rating = str(float(rating) * 10)
    except AttributeError:
        pass
    try:
        cast_table = soup.find("table", {
            "class": "cast_list"
        }).find_all("span", {"class": "itemprop"})
        actors = []
        for item in cast_table:
            actors.append(item.text)
        characters = []
        character_list = soup.find("table", {
            "class": "cast_list"
        }).find_all("td", {"class": "character"})
        for item in character_list:
            characters.append(item.a.text)
        cast_list.update({"actor": actors, "character": characters})
    except AttributeError:
        pass

    try:
        resume = soup.find("span", {"itemprop": "description"}).text
    except AttributeError:
        pass
    try:
        genres = soup.find("div", {"itemprop": "genre"}).find_all("a")
        genre = []
        for item in genres:
            genre.append(item.text.strip())
    except AttributeError:
        pass

    all_text = soup.findAll(text=True)
    page_text = " ".join(filter(utils.visible, all_text))
    data = {}
    data['title'] = title.strip()
    data['resume'] = resume.strip()
    data['rate'] = rating.strip()
    data['genre'] = genre
    data['cast'] = cast_list
    data['site_data'] = page_text
    path = "extractor/imdb"
    fileName = title
    utils.writeToJson(fileName, path, data)
Exemple #3
0
def get_data(site, count):
    try:
        soup = utils.get_link(site)
        genre = ""
        title = ""
        cast_list = {}
        rating = ""
        resume = ""
        try:
            title = soup.title.text.strip()
            title = title.split("-")[0].strip()
        except AttributeError:
            pass
        try:
            resume = soup.find(
                "div", {
                    "class": "tvobject-masthead-wrapper content-wrapper"
                }).find("div", {
                    "class": "tvobject-masthead-description"
                }).text.strip()
        except AttributeError:
            pass
        try:
            rating = soup.find("li", {
                "class": "tvobject-overview-about-line"
            }).text
        except AttributeError:
            pass
        try:
            cast = soup.find("div", {
                "data-section-id": "cast"
            }).find("div", {
                "class": "row"
            }).find_all("div")
            cast_list = []
            for item in cast:
                cast_list.append(item.text.strip())
        except AttributeError:
            pass
            all_text = soup.findAll(text=True)
            page_text = " ".join(filter(utils.visible, all_text))
            data = {}
            data['link'] = site
            data['title'] = title.strip()
            data['resume'] = resume.strip()
            data['rate'] = rating.strip()
            data['cast'] = cast_list
            data['site_data'] = page_text
            path = "extractor/tvguide"
            fileName = count
            utils.writeToJson(fileName, path, data)
    except ConnectionError:
        pass
    except requests.exceptions.HTTPError:
        pass
Exemple #4
0
def get_data(site, count):
    try:
        soup = utils.get_link(site)
        try:
            title = soup.find(
                "div", {
                    "class":
                    "col-md-10 col-md-offset-2 col-sm-9 col-sm-offset-3 mobile-title"
                }).text
        except AttributeError:
            pass
        try:
            rating = soup.find("div", {"class": "rating"}).text
        except AttributeError:
            pass
        try:
            genre = str.replace(
                soup.find("label", text="Genres").parent.text, "Genres", "")
        except AttributeError:
            pass
        try:
            resume = soup.find("div", {"itemprop": "description"}).text
            cast_list = []
        except AttributeError:
            pass
        try:
            list_actors = soup.find_all("li", {"itemprop": "actor"})
            for item in list_actors:
                name = item.find("h4", itemprop="name").text
                character = item.find("h4", {"class": "character"}).text
                cast_list.append([name, character])
        except AttributeError:
            pass
            data = {}
            all_text = soup.findAll(text=True)
            page_text = " ".join(filter(utils.visible, all_text))
            data = {}
            data['link'] = site
            data['title'] = title.strip()
            data['resume'] = resume.strip()
            data['rate'] = rating.strip()
            data['genre'] = genre
            data['cast'] = cast_list
            data['site_data'] = page_text
            path = "extractor/tracktv"
            fileName = count
            utils.writeToJson(fileName, path, data)
    except ConnectionError:
        pass
    except requests.exceptions.HTTPError:
        pass
Exemple #5
0
 def init_snap_data(self, snap_time_str):
     print('Start to trace the ' +
           UseStyle(snap_time_str, fore='green', mode='underline') +
           ' snap data\n')
     snap_time = format_time_string_2_number(snap_time_str)
     self.snap_time = snap_time
     self.device_config_obj = DeviceConfigs.initialize_with(
         self.mars_config, get_devices_configs(self.mars_config, snap_time))
     self.hosts_obj = Hosts.initialize_with(
         self.mars_config, get_host(self.mars_config, snap_time))
     self.group_obj = Groups.initialize_with(
         self.mars_config, get_group(self.mars_config, snap_time))
     self.flow_obj = Flows.initialize_with(
         self.mars_config, get_flow(self.mars_config, snap_time))
     self.link_obj = Links.initialize_with(
         self.mars_config, get_link(self.mars_config, snap_time))
Exemple #6
0
def dump_employees(filepath):
    with open(filepath, 'r') as input_csv:
        reader = csv.reader(input_csv)
        next(reader)
        for line in reader:
            Name = line[1]
            City = line[2]
            Cuisine_style = line[3]
            Ranking = line[4]
            Rating = line[5]
            Price_range = line[6]
            Number_of_reviews = line[7]
            Reviews = line[8]
            menu = line[9]
            menu_link = line[9]
            address = line[9]
            add_value(Name, City, Cuisine_style, Ranking, Rating, Price_range,
                      Number_of_reviews, Reviews, menu, get_link(menu_link),
                      get_address(address))
Exemple #7
0
    def show_snap_links(self, snap_time_str):
        snap_time = format_time_string_2_number(snap_time_str)
        print_normal('Show the ' +
                     UseStyle(snap_time_str, fore='green', mode='underline') +
                     ' links.')

        link_obj = Links.initialize_with(self.mars_config,
                                         get_link(self.mars_config, snap_time))
        devices_config_obj = DeviceConfigs.initialize_with(
            self.mars_config, get_devices_configs(self.mars_config, snap_time))

        for device_config in devices_config_obj.get_data():
            print_normal_start('Device Name : ' + device_config['name'],
                               color='yellow')
            for link in link_obj.get_data():
                if link['src']['device'] == device_config['id']:
                    print_normal_center(
                        link_to_line_string(link, devices_config_obj))
            print_normal_end('')
            print_normal('')
Exemple #8
0
def get_max(mapList):
    maxKey = ""
    maxValue = 0
    for key in mapList:
        if maxValue < mapList[key]:
            maxValue = mapList[key]
            maxKey = key
    return maxKey


with open('extractor\sites.txt') as f:
    lines = f.readlines()
    f.close()

for site in lines:
    soup = utils.get_link(site)
    removed = 0
    print(clean_page(soup, 0))
    ranked_element = {}
    cast_temp = []
    aux = {}
    cast = get_cast(soup)
    ranking = []
    title = soup.title.text
    resume = get_sinopses(soup.html.find_all(text=True), 0)
    print(title)
    print(clean_text(resume[-1][0]))
    print(cast)

    # bodyArray = body.text.strip()
    # print(bodyArray)
Exemple #9
0
def get_data(site, count):
    try:
        soup = utils.get_link(site)
        genre = []
        title = ""
        rate = ""
        try:
            title = soup.find("div", {"class": "title"}).text.strip()
            title = title.split("(")[0].strip()
        except AttributeError:
            pass
        try:
            resume = soup.find("div", {"class": "overview"}).text
        except AttributeError:
            resume = None
        try:
            cast = soup.find("ol", {"class": "people scroller"}).find_all("li")
            cast_l = []
            actor_list = []
            character_list = []
            for item in cast:
                cast_l.append(item.text.strip().split("\n"))
            for item in cast_l:
                actor_list.append(item[0])
                if len(item) == 2:
                    character_list.append(item[1])
        except AttributeError:
            actor_list = []
            character_list = []
        try:
            genres = soup.find("section", {
                "class": "genres right_column"
            }).find_all("ul")
            for item in genres:
                gr = item.text.replace("\n", " ").strip().split(" ")
                for i in gr:
                    if len(i) > 2:
                        genre.append(i)
        except AttributeError:
            genres = []
        try:
            rate = soup.find("div", class_="user_score_chart")['data-percent']
        except AttributeError:
            rate = None
        data = {}
        all_text = soup.findAll(text=True)
        page_text = " ".join(filter(utils.visible, all_text))
        data = {}
        data['link'] = site
        data['title'] = title.strip()
        data['resume'] = resume.strip()
        data['rate'] = rate
        data['genre'] = genre
        data['cast'] = actor_list
        data['character'] = character_list
        data['site_data'] = page_text
        path = "extractor/tvmovidDB"
        fileName = count
        utils.writeToJson(fileName, path, data)
    except ConnectionError:
        pass
    except requests.exceptions.HTTPError:
        pass
Exemple #10
0
def get_data(site, count):
    try:
        soup = utils.get_link(site)
        genre = ""
        title = ""
        cast_list = {}
        rate = ""
        try:
            title = soup.title.text.strip()
            title = title.split("-")[0].strip()
        except AttributeError:
            pass
        try:
            resume = soup.find("div", {"id": "movieSynopsis"}).text.strip()
        except AttributeError:
            pass
        try:
            genre = soup.find(
                "td", text="Genre:").parent.text.split("\n")[2].strip()
        except AttributeError:
            pass
        try:
            cast = soup.find_all("div",
                                 {"class": "cast-item media inlineBlock "})
            actors_list = []
            characters_list = []
            for item in cast:
                actor = item.find("div").find("a").text.strip()
                actors_list.append(actor)
                character = str.replace(
                    item.find("span", {
                        "class": "characters subtle smaller"
                    }).text, "as ", "")
                characters_list.append(character)
            cast_list.update({
                "actor": actors_list,
                "character": characters_list
            })
        except AttributeError:
            pass
        try:
            rate = soup.find("span", {
                "class": "meter-value superPageFontColor"
            }).span.text.strip()
        except AttributeError:
            pass
        data = {}
        data['link'] = site
        data['title'] = title
        data['resume'] = resume
        data['rate'] = rate
        data['genre'] = genre
        data['cast'] = cast_list
        page_text = utils.text_from_html(soup)
        data['site_data'] = page_text
        path = 'extractor/rottentomatoes'
        fileName = count
        utils.writeToJson(fileName, path, data)
    except ConnectionError:
        pass
    except requests.exceptions.HTTPError:
        pass