Example #1
3
def TimeCode(resp):
    string = BeautifulSoup(str(resp))
    string = unicode(string.find('b'))
    #print string
    Day =''
    for i in range(2,8):
        if u'Thứ '+str(i) in string:
            Day = str(i)
    if u'Chủ nhật' in string:
        Day = str(8)

    string = string.split(u'tiết ')[1]
    string = string.split(' (LT)')[0]
    #string = 1,2 / 1,2,3 / 3,4,5 / 4,5 ...
    NumOfPeriod = str((len(string)+1)/2)
    #NumOfPeriod = 2/3/4
    Period = str(string[0])
    #fist period of class
    return Day+Period+NumOfPeriod
Example #2
1
def countWords(c) :
    c.execute("SELECT title, content FROM listing")

    listings = c.fetchall()
    for title, content in listings :
        title = title + r' '
        all_content = BeautifulSoup(title + content).getText()
        all_content = all_content.replace('\n', ' ')
        all_content = re.sub(r'START CLTAGS.*END CLTAGS', '', all_content)
        tokens = set(token.lower() for token in all_content.split())
        tokens -= STOP_WORDS
        for word in tokens  :
            c.execute("INSERT INTO words (word, counter) "
                      "VALUES (%s, 1) "
                      "ON DUPLICATE KEY UPDATE counter = counter + 1", (word.lower(),))
    def parse_item(self, response):

        item = CpuItem()
        item['link'] = response.url
        item['name'] = response.css('span.cpuname').xpath('text()').extract()[0]

        search = {
            'description': u'Description:',
            'other_name': u'Other names:',
            'g3d_mark':u'G3DMark/\$Price:',
            'clock':u'Clockspeed:',
            'core':u'No of Cores:'
        }

        rank = u'Samples:'

        i = 0
        for sel in response.css("table.desc tr")[1].xpath('td'):
            text = sel.extract()
            textSplit = text.split(u'<span style="font-weight: bold;">')
            for ii in textSplit:
                cleantext = BeautifulSoup(ii).text
                for si in search:
                    if (cleantext.find(search[si]) != -1 ):
                        item[si] = cleantext.replace(search[si],"")
                if (cleantext.find(rank) != -1):
                    item['rank'] = cleantext.split(rank)[0]

            i = i+1
        yield item
Example #4
0
    def parse_item(self, response):

        item = GpuItem()
        item['link'] = response.url
        item['name'] = response.css('span.cpuname').xpath(
            'text()').extract()[0]

        search = {
            'description': u'Description:',
            #'processzor_modell': u'Videocard Category:',
            'other_name': u'Other names:',
            #'memoria_merete': u'Videocard First Benchmarked:',
            'g3d_mark': u'G3DMark/\$Price:',
            #'memoria_max_seb':u'Overall Rank:',
            #'memoria_foglalat':u'Last Price Change:',
        }

        rank = u'Samples:'

        i = 0
        for sel in response.css("table.desc tr")[1].xpath('td'):
            text = sel.extract()
            textSplit = text.split(u'<span style="font-weight: bold;">')
            for ii in textSplit:
                cleantext = BeautifulSoup(ii).text
                for si in search:
                    if (cleantext.find(search[si]) != -1):
                        item[si] = cleantext.replace(search[si], "")
                if (cleantext.find(rank) != -1):
                    item['rank'] = cleantext.split(rank)[0]

            i = i + 1
        yield item
def cleanhtml(raw):
    cleanr = re.compile('<.*?>|\\n')
    raw = re.sub(cleanr, ' ', raw)
    raw = BeautifulSoup(raw).getText()
    raw = raw.replace(u"\u2018",
                      "'").replace(u"\u2019",
                                   "'").replace(u"\u201c",
                                                '"').replace(u"\u201d", '"')
    raw = raw.replace(' .', '.').replace(' ,', ',')
    raw = ' '.join(raw.split())
    return raw
Example #6
0
 def convertIMGBase64(text):
     tags = re.findall("<img.*?>", text)
     for tag in tags:
         filename = BeautifulSoup(tag).findAll("img")[0]['src']
         filetype = filename.split(".")[1]
         file = open("./{}".format(filename))
         data = file.read()
         file.close()
         data = data.encode("base64")
         new_tag = '<img src="data:image/{};base64,{}">'.format(filetype, data)
         text = text.replace(tag, new_tag)
     return text
def format_zagaz_station_2013(block_station):
  href_string = BeautifulSoup(block_station[0][0])\
                  .find('a', {'href' : re.compile('station.php\?id_s*')})['href']
  id_station = re.search('station.php\?id_s=([0-9]*)', href_string).group(1)
  highway =  BeautifulSoup(block_station[0][0])\
                  .find('a', {'href' : re.compile('autoroute.php\?id_a*')})
  if highway:
    highway = highway['title']
  brand_and_name_station = BeautifulSoup(block_station[0][0])('strong')[0].string
  brand_station = brand_and_name_station.split('&nbsp;-&nbsp;')[0]
  name_station = brand_and_name_station.split('&nbsp;-&nbsp;')[1]
  street_station = BeautifulSoup(block_station[0][1]).findAll(text=True)
  street_station = [str_correct_html(elt).strip() for elt in street_station\
                      if str_correct_html(elt).strip()]
  zip_station, city_station = None, None
  if len(block_station[0]) > 2:
    zip_station = BeautifulSoup(block_station[0][2]).p.find(text=True).strip() # maybe fragile
    city_station =  str_correct_html(BeautifulSoup(block_station[0][2])\
                      .find('a', {'href' : re.compile('prix-carburant.php*')}).string)
  if block_station[1]:
    comment_station = BeautifulSoup(block_station[1][0]).find('div', {'class' : 'station_comm'}).string
  else:
    comment_station = None
  latitude = re.search('Latitude: ([0-9.]*)', block_station[2][0])
  longitude = re.search('longitude: (-?[0-9.]*)', block_station[2][0])
  if latitude and longitude:
    gps_station = (latitude.group(1), longitude.group(1), block_station[2][1])
  else:
    gps_station = (None, None, None)
  ls_zagaz_station = [id_station,
                      brand_station,
                      name_station,
                      comment_station,
                      street_station,
                      zip_station,
                      city_station,
                      gps_station,
                      highway]
  return ls_zagaz_station
def feed_entry_description_terms(description):
    # Get text only with beautifulsoup
    text = BeautifulSoup(description).getText()
    # Get words by splitting text with whitespace
    words = text.split()
    
    # Remove word contains only non-ascii character
    # Reference : http://stackoverflow.com/questions/1276764/stripping-everything-but-alphanumeric-chars-from-a-string-in-python
    words = [word for word in words if re.sub(r'\W+', '', word)]
    
    # Convert to lowercase
    words = [word.lower() for word in words]
    
    return words
def format_zagaz_station(block_station):
  href_string = BeautifulSoup(block_station[0][0])\
                  .find('a', {'href' : re.compile('station.php\?id_s*')})['href']
  id_station = re.search('station.php\?id_s=([0-9]*)', href_string).group(1)
  highway =  BeautifulSoup(block_station[0][0])\
                  .find('a', {'href' : re.compile('autoroute.php\?id_a*')})
  if highway:
    highway = highway['title']
  brand_and_name_station = BeautifulSoup(block_station[0][0])('strong')[0].string
  # check if other ('strong') with highway?
  brand_station = brand_and_name_station.split('&nbsp;-&nbsp;')[0]
  name_station = brand_and_name_station.split('&nbsp;-&nbsp;')[1]
  street_station = str_correct_html(BeautifulSoup(block_station[0][1])('p')[0].string)
  zip_station = BeautifulSoup(block_station[0][2])('p')[0].contents[0].strip()
  city_station = str_correct_html(BeautifulSoup(block_station[0][2])\
                  .find('a', {'href' : re.compile('prix-carburant.php*')}).string)
  if block_station[1]:
    comment_station = BeautifulSoup(block_station[1][0]).find('div',
                                                              {'class' : 'station_comm'}).string
  else:
    comment_station = None
  latitude = re.search('Latitude: ([0-9.]*)', block_station[2][0])
  longitude = re.search('longitude: (-?[0-9.]*)', block_station[2][0])
  if latitude and longitude:
    gps_station = (latitude.group(1), longitude.group(1), block_station[2][1])
  else:
    gps_station = (None, None, None)
  ls_zagaz_station = [id_station,
                      brand_station,
                      name_station,
                      comment_station,
                      street_station,
                      zip_station,
                      city_station,
                      gps_station,
                      highway]
  return ls_zagaz_station
Example #10
0
def baggify(content, c) :

    content_text = BeautifulSoup(content).getText()
    content_text = re.sub(r'START CLTAGS.*END CLTAGS', '', content_text)
    tokens = [token.lower() for token in content_text.split()]

    content_bag = []
    for token in tokens :
        if token in STOP_WORDS :
            continue
        c.execute("SELECT counter FROM words where word = %s", (token,))
        count = c.fetchone()
        if count and count[0] > 10 :
            content_bag.append(token)

    return r' '.join(content_bag)
Example #11
0
def parse_pages():
	blogfile = open("blogatog_page_1.html", "r")
	soup = BeautifulSoup(blogfile.read())
	menu = soup.findAll('div',attrs={'class':'post-content'})
	# some hackery to extract the number of pages to iterate through
	num_pages = int(soup.findAll('span',attrs={'class':'page-number'})[0].getText().split("of")[1].strip())
	print(str(num_pages) + "pages")
	outfile = open("_maybe_data.xml", "w", 0)
	convos = etree.Element("Conversations")
	question_counter = 0
    # parse pages till end
	for i in range(1,9000):
		blogfilename = "blogatog_page_" + str(i) + ".html"
		if not os.path.isfile(blogfilename):
			print ("no existy")
			break
		blogfile = open(blogfilename, "r")
		soup = BeautifulSoup(blogfile.read())
		# print(str(i))
		blogtxt = open(blogfilename, "r").read()
		page_soup = BeautifulSoup(blogtxt)
		menu = soup.findAll('div',attrs={'class':'post-content'})
		for subMenu in menu:
			if len(str(subMenu).split("</b></p><p>")) < 2:
				continue
            # hackery to split the question and answer
			q = str(subMenu).split("</b></p><p>")[0].split("asked: ")[1]
			a = str(subMenu).split("</b></p><p>")[1].replace("</p>\n</div>", "")
			a = html_escape(a)
            # regex to look for anything that looks like "maybe :)"
			if re.search("aybe.*:.*\)", a):
				q = BeautifulSoup(q).getText()
				a = BeautifulSoup(a.split("<div class=\"tags\">")[0]).getText()
				print q
				print a
				print("\n")
				# print("f")
				convo = etree.SubElement(convos, "Conversation")
				convo.set("id", str(question_counter))
				question_counter += 1
				etree.SubElement(convo, "Question").text = q
				etree.SubElement(convo, "Answer").text = a
	xmlout = etree.tostring(convos, pretty_print=True)
	outfile.write(xmlout)
def listProd():
    """ A partir du site internet de Leroy Merlin, retourne une liste de contreplaque avec carac au format json """
    listProduits = []
    url = "http://www.leroymerlin.fr/v3/search/search.do?pageTemplate=Recherche&resultOffset=0&resultLimit=100&resultListShape=SEARCHENGINE_PRODUCT_LIST_PLAIN&facet=PRODUCT&keyword=contre+plaqu%C3%A9&sort=TRI_PAR_PRIX_CROISSANT_ID&intuitiontodo=newSearchAllSite"
    data = urllib.urlopen(url).read()
    soup = BeautifulSoup(data)
    soup = soup.prettify()
    lines = soup.split("\n")
    for i in range(len(lines)):
        line = lines[i]
        chaine_a_chercher = "prd-infos"
        if chaine_a_chercher in line:  
            produit = {}
            prodTot = lines[i+4]
            description = prodTot.split(",")[0]
            descriptionList = description.split(" ")
            typ = descriptionList[7]
            if len(descriptionList)>9:
                materiau = " ".join(descriptionList[9:])
            dimension = prodTot.split(",")[1]
            dimension = dimension.replace(" ", "")
            dimension = dimension.replace("L", "")
            dimension = dimension.replace("l", "")
            dimension = dimension.replace(".", "")
            longueur = float(dimension.split("x")[0])
            largeur = float(dimension.split("x")[1])
            epaisseur = prodTot.split(",")[2].replace("epais. ", "")
            epaisseur = epaisseur.replace("mm", "")
            surface = (largeur * longueur) / 1000
        if "price-wrapper" in line:
            prix = float(lines[i+6].replace("&euro;", ""))
            prixSurface = round(prix /surface, 2)
            produit["typ"] = typ
            produit["materiau"] = materiau
            produit["longueur"] = longueur
            produit["largeur"] = largeur
            produit["surface"] = surface
            produit["epaisseur"] = epaisseur
            produit["prix"] = prix
            produit["prixSurface"] = prixSurface
            listProduits.append(produit)
    return(listProduits)
Example #13
0
 def convertIMGBase64(text):
     tags = re.findall("<img.*?>", text)
     for tag in tags:
         filename = BeautifulSoup(tag).findAll("img")[0]['src']
         filetype = filename.split(".")[1]
         img = Image.open("./"+filename)
         orig_height = img.size[1]
         orig_width = img.size[0]
         if orig_width > 550:
             percent = 550 / float(orig_width)
             height = int(float(orig_height) * float(percent))
             img = img.resize((550, height), PIL.Image.ANTIALIAS)
             img = img.save(filename)
         file = open("./{}".format(filename))
         data = file.read()
         file.close()
         data = data.encode("base64")
         new_tag = '<img src="data:image/{};base64,{}">'.format(filetype, data)
         text = text.replace(tag, new_tag)
     return text
    def get_school_and_rank(self, outfile):

        rows = self.soup.findAll('tr', {'valign': 'top'})
        for row in rows:
            rank = re.findall(self.numbers_regex, \
                                   row.find('span').renderContents())
            current_school = row.find('a', {
                'class': 'school-name'
            }).contents[0]
            current_school = BeautifulSoup(
                current_school, convertEntities=BeautifulSoup.HTML_ENTITIES)
            current_school = ''.join([
                i if ord(i) < 128 else ' ' for i in current_school.contents[0]
            ])
            current_school = ' '.join(
                [word.lower() for word in current_school.split(' ')])
            print current_school + ' ' + str(rank)

            json.dump(dict(zip([current_school], rank)), outfile)
            outfile.write('\n')
 def convertIMGBase64(text):
     tags = re.findall("<img.*?>", text)
     for tag in tags:
         filename = BeautifulSoup(tag).findAll("img")[0]['src']
         filetype = filename.split(".")[1]
         img = Image.open("./"+filename)
         orig_height = img.size[1]
         orig_width = img.size[0]
         if orig_width > 550:
             percent = 550 / float(orig_width)
             height = int(float(orig_height) * float(percent))
             img = img.resize((550, height), PIL.Image.ANTIALIAS)
             img = img.save(filename)
         file = open("./{}".format(filename))
         data = file.read()
         file.close()
         data = data.encode("base64")
         new_tag = '<img src="data:image/{};base64,{}">'.format(filetype, data)
         text = text.replace(tag, new_tag)
     return text
Example #16
0
def scrape(start_page=1):
    global query, max_pages, data_dir
    for n in xrange(start_page, max_pages):
        dirname = os.path.join(data_dir,str(n))
        if os.path.exists(dirname):
            continue
        print "Retrieving page set: ", n, "...",
        os.makedirs(dirname)
        url = query.replace('page=1','page=%d' % n)
        data = json.load(urlopen(url))
        i = 1
        for x in data['response']['results']:
            fields = x['fields']
            headline = fields['headline'].replace('\n','')
            raw_body = fields['body']
            body = BeautifulSoup(raw_body,smartQuotesTo=None).getText('\n')
            body_cleaned = body.split('\n\n\n\n\n')[0]
            save(dirname, i, headline, body_cleaned)
            i += 1
        print 'done.'
        sleep(1)
Example #17
0
    def update_one(self, show_ccn, link, start_time, duration):
        duration = timedelta(minutes=duration)

        # Load detail show page and check if the latest data is updated
        page = urllib2.urlopen(link)
        soup = BeautifulSoup(page)
        update_time = datetime.strptime(soup.em.string.replace('-', ''),
                                        DATE_FORMAT_B)
        if not self.is_latest(show_ccn, update_time):
            # Get H1 tag that includes title data and imdb link data
            h1 = soup.h1.a
            title = h1.string
            # Convert normal imbd link to mobile version
            imdb_mobilelink = "http://m.imdb.com/title/tt"
            imdb_tt_number = h1['href'][27:]
            imdb_link = imdb_mobilelink + (
                '0' * (7 - len(imdb_tt_number))) + imdb_tt_number

            # Get CSV representation of episode data from TVRage
            csv_link = self.find_csv_link(soup)
            csv_page = urllib2.urlopen(csv_link)

            # Convert CSV string to list of episode
            csv_string = BeautifulSoup(csv_page).pre.string.replace('\r\n', '')
            csv_ep_list = csv_string.split('\n')[1:]
            ep_list = []

            # Examine each episode to classify
            for csv_ep in csv_ep_list:
                ep_info_split = csv_ep.split(',')
                ep_info = []
                for ep_info_item in ep_info_split:
                    if len(ep_info_item) > 0:
                        if (ep_info_item[0] == '"') and (ep_info_item[-1]
                                                         == '"'):
                            ep_info_item = ep_info_item[1:-1]
                    ep_info.append(ep_info_item)
                idx, season, num, pcode, airdate = ep_info[:5]
                ep_title = ','.join(ep_info[5:-2])
                if ep_info[-1] == 'n':
                    is_special = False
                else:
                    is_special = True

                if idx:
                    if idx.isdigit():
                        idx = int(idx)
                    else:
                        self.logger.error(
                            'Index "%s" must be a number - %s for %s' %
                            (idx, str(csv_ep), title))
                else:
                    if is_special:
                        idx = 0
                    else:
                        self.logger.error(
                            'Only special episodes can have a empty index - %s for %s'
                            % (str(csv_ep), title))
                        idx = -1

                if season.isdigit():
                    season = int(season)
                else:
                    self.logger.error(
                        'Season "%s" must be a number - %s for %s' %
                        (season, str(csv_ep), title))
                    season = -1

                if num.isdigit():
                    num = int(num)
                else:
                    if is_special != True:
                        self.logger.error(
                            'Episode number "%s" must be a number - %s for %s'
                            % (num, str(csv_ep), title))
                        num = -1
                    else:
                        num = 0

                airdate = str(airdate.replace('"', ''))
                date_fmt = re.compile(
                    '\d\d/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/\d\d'
                )
                if airdate == 'UNKNOWN':
                    air_dt_tuple = [9999, 12, 31, 23, 59]
                elif date_fmt.match(airdate):
                    # Combine timeslot and date to make datetime object and convert timezone to kst
                    kst_air_dt = self.dc.convert_tz('kst',
                                                    (str(airdate), start_time))
                    kst_end_dt = kst_air_dt + duration
                    air_dt_tuple = self.dc.datetime_to_tuple(kst_air_dt)

                    ep_ccn = show_ccn + ep_title + str(idx)
                    if kst_air_dt > self.now:
                        status = 'yet'
                        self.next.update(
                            {'ccn': ep_ccn},
                            {'$set': {
                                'title': title,
                                'ad': air_dt_tuple
                            }})
                    elif kst_end_dt < self.now:
                        status = 'aired'
                    else:
                        status = 'airing'
                elif airdate == 'UNAIRED':
                    air_dt_tuple = [0000, 1, 1, 0, 0]
                else:
                    air_dt_tuple = [-1, -1, -1, -1, -1]

                # Build up episode information list.
                ep = [
                    idx, season, num, pcode, air_dt_tuple, ep_title, is_special
                ]
                ep_list.append(ep)

            # Update detail information about the show
            self.shows.update({'ccn': show_ccn},
                              {'$set': {
                                  'episodes': ep_list
                              }})
            self.update_timestamp(show_ccn, update_time)

            # Upsert show into index
            self.index.update({'ccn': show_ccn},
                              {'$set': {
                                  'title': title,
                                  'imdb_m': imdb_link
                              }},
                              upsert=True)
            self.temp_index.append([show_ccn, title, imdb_link])
            self.logger.info('Episodes Updated for ' + show_ccn +
                             ' titled with ' + title)
        else:
            # Data is the latest, so no need to update
            self.logger.info('Episodes for ' + show_ccn + ' is latest.')
Example #18
0
 def decode_title(self, title):
     new_title = BeautifulSoup(title.encode('utf-8'),
                               convertEntities="html").prettify()
     if '\n' in new_title:
         new_title = new_title.split('\n')[0]
     return new_title
Example #19
0
def lake_union_weather():
    url_to_use = 'https://lakeunionweather.info'
    page = requests.get(url_to_use)
    soup = BeautifulSoup(page.content)
    try:
        header_data = soup.findAll("div", {"id": "Header"})[0]
        atmosphere_data = soup.findAll("table", {"id": "WeatherTable"})[0]
        water_data = soup.findAll("table", {"id": "WaterTable"})[0]
    except IndexError:
        pass

    # First lets get the date and time out of this
    info_date = None
    date_data = header_data.findAll('h4')[0]
    date_data = BeautifulSoup("{}".format(date_data)).getText()
    date_string = date_data.split('recorded on')[1].strip()
    info_date = datetime.strptime(date_string, "%d %b %Y %I:%M %p")

    # This is a gross way to get all the data from the table, but so it goes
    air_temp_f = None
    wind_chill_f = None
    avg_windspeed_dir = None
    avg_windspeed_mph = None
    for tr in BeautifulSoup("{}".format(atmosphere_data)).findAll('tr')[1:]:
        ths = BeautifulSoup("{}".format(tr.findAll('th')[0])).getText()
        tds = BeautifulSoup("{}".format(tr.findAll('td')[0])).getText()
        if ths.find('Temperature') >= 0:
            air_temp_f = float(tds.split('&#176;F')[0])
        elif ths.find('Wind Chill') >= 0:
            wind_chill_f = float(tds.split('&#176;F')[0])
        elif ths.find('Av. Windspeed') >= 0:
            avg_windspeed_mph = float(tds.split('MPH')[0].strip())
            avg_windspeed_dir = tds.split('from the')[1].strip()

        # This is a gross way to get all the data from the table, but so it goes
    water_temp_f = None
    for tr in BeautifulSoup("{}".format(water_data)).findAll('tr')[1:]:
        tds = tr.findAll('td')
        if float(BeautifulSoup("{}".format(tds[0])).getText()) < 5:
            water_temp_f = float(BeautifulSoup("{}".format(tds[1])).getText())

    # Now let's find the time diff when we got this
    if info_date is None:
        time_string = ""
    else:
        # Need to make this aware of the time zone
        tz = pytz.timezone('US/Pacific')
        latest_date_tz = tz.localize(info_date)
        time_diff = datetime.now(tz) - latest_date_tz
        # time_diff = datetime.now() - latest_date_water_temp
        if time_diff.days > 0:
            hours_diff = time_diff.days * 24
            hours_diff += time_diff.seconds / 60 / 60
        else:
            hours_diff = time_diff.seconds / 60 / 60
        time_string = " about {} hours ago".format(hours_diff)

    if air_temp_f is None and water_temp_f is None:
        # This means we didn't find anythiing
        retval = "I'm sorry, I couldn't find any recent data about the weather on lake union"
    else:
        retval = "Last known conditions on lake union include: "
        num_values = 0
        if air_temp_f is not None:
            retval += "Water temperature of {:.0f} degrees fahrenheit".format(
                round(air_temp_f))
            num_values += 1
        if air_temp_f is not None:
            if num_values > 0:
                retval += ", and "
            retval += "Air temperature of {:.0f} degrees fahrenheit, ".format(
                round(air_temp_f))
            retval += "Wind chill of {:.0f} degrees fahrenheit, ".format(
                round(wind_chill_f))
            retval += "wind speed of {:.0f} miles per hour ".format(
                round(utils.mps_to_mph(avg_windspeed_mph), 1))
            retval += "coming from the {}".format(
                utils.compass_to_words(avg_windspeed_dir))
        retval += "{}".format(time_string)
    return retval
Example #20
0
 def getNewLines(self):
     if self.newlines is None:
         rawcontent = urllib2.urlopen(self.url).read()
         newcontent = BeautifulSoup(rawcontent).prettify()
         self.newlines = newcontent.split("\n")
     return self.newlines
Example #21
0
def get_links_list(url):
    response = requests.get(url, headers=headers)
    # response.status
    htm = response.text
    htm = htm.encode('iso-8859-1', 'ignore')
    htm = BeautifulSoup(htm)
    plot = htm.findAll('h1', attrs={'class': re.compile('sporttitle')})
    plot = parser.unescape(
        str(plot).replace('\t', '').replace('\n', '').replace('[', '').replace(
            ']', '').decode('utf-8').strip())
    plot = re.sub(r'<[^>]*>', r'', plot)
    htm = str(htm)

    if re.search(get_localized_string(T_ED_RESUL), htm):
        htm = re.split(get_localized_string(T_ED_RESUL), htm, 1)[1]
        htm = htm.split('<div id="comblockabs">', 1)[0]
        htm = re.sub(r'\t', r'', ''.join(htm))
        htm = re.sub(r'\n', r'', htm)
        htm = parser.unescape(htm.decode('utf-8').strip())

        query = """.+?<b>(.+?)</b>.+?"""
        elinks = re.compile(query, re.DOTALL).findall(htm)
        elinks[0] = '[COLOR lightskyblue]' + get_localized_string(
            T_ED_RESUL) + elinks[0] + '[/COLOR]'
        for el in elinks:
            image = media + '/33054.png'

            list_item = xbmcgui.ListItem(label=el)
            list_item.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            list_item.setInfo('video', {'plot': plot})
            list_item.setProperty('IsPlayable', 'false')

            k_url = ''

            listing.append((k_url, list_item, False))

    elif re.search('AceStream Links', htm):
        htm = htm.split('<span class="lnkt">AceStream Links</span>', 1)[-1]
        htm = htm.split('<div id="comblockabs">', 1)[0]
        htm = re.sub(r'\t', r'', ''.join(htm))
        htm = re.sub(r'\n', r'', htm)
        htm = re.sub(r'<td width="16">', r'\n', htm)
        htm = parser.unescape(htm.decode('utf-8').strip())

        query = """<img title=".+?/linkflag/(.+?).png" />.+?class="bitrate".+?">(.+?)/td>.+?<a href="acestream:(.+?)">.+?"""
        elinks = re.compile(query, re.DOTALL).findall(htm)

        for el in elinks:
            image = media + "/flags/" + el[0] + ".gif"

            list_item = xbmcgui.ListItem(
                label='[B]Audio: ' + check_audio_lang(el[0]) +
                ', Bitrate: AceStream ' + el[1].replace('<', '') + '[/B]')
            list_item.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            list_item.setInfo('video', {'plot': plot})
            list_item.setProperty('IsPlayable', 'true')

            k_url = 'plugin://program.plexus/?mode=1&url=acestream:' + el[
                2] + '&name=[B]Audio: ' + check_audio_lang(
                    el[0]) + ', Bitrate: AceStream ' + el[1].replace(
                        '<', '') + '[/B]'

            listing.append((k_url, list_item, False))

    else:
        image = media + '/33056.png'
        list_item = xbmcgui.ListItem(
            label='[I][B]' + get_localized_string(T_NO_LSTRM) + '[/B][/I]')
        list_item.setArt({
            'fanart': addonID.getAddonInfo('fanart'),
            'icon': image,
            'thumb': image,
            'poster': image
        })
        list_item.setInfo('video', {'plot': plot})
        list_item.setProperty('IsPlayable', 'false')

        k_url = ''

        listing.append((k_url, list_item, False))

    if len(listing) < 1:
        image = media + '/33057.png'
        list_item = xbmcgui.ListItem(label='¡¡¡GRFTJX!!! ¡¡¡GRMBLFJ!!!')
        list_item.setArt({
            'fanart': addonID.getAddonInfo('fanart'),
            'icon': image,
            'thumb': image,
            'poster': image
        })
        list_item.setInfo('video', {'plot': plot})
        list_item.setProperty('IsPlayable', 'false')

        k_url = ''

        listing.append((k_url, list_item, False))

    return listing
Example #22
0
# Debug Variable
debug = False

# Registers FireFox and Microsoft Edge as available browser
ffpath = 'C:\\Program Files (x86)\\Mozilla Firefox\\firefox.exe'
webbrowser.register('firefox', None, webbrowser.BackgroundBrowser(ffpath), 1)

mepath = 'edge.bat'
webbrowser.register('edge', None, webbrowser.BackgroundBrowser(mepath), 2)

# Retrieve Array of Words to use from Long Wikipedia Article
random_words = urllib2.urlopen(
    'https://en.wikipedia.org/wiki/1918_New_Year_Honours').read()
clean_words = BeautifulSoup(random_words).text
words = []
for word in clean_words.split():
    words.append(word)
print("Length of Array of Words is " + str(len(words)))


# Function to search Bing and acquire points
def search_bing():
    word1 = random.randint(0, len(words))
    word2 = random.randint(0, len(words))
    word3 = random.randint(0, len(words))
    #webbrowser.open_new('https://www.bing.com/search?q='+words[word1]+'+'+words[word2]+'+'+words[word3])
    open_curr_tab('https://www.bing.com/search?q=' + unidecode(words[word1]) +
                  '+' + unidecode(words[word2]) + '+' +
                  unidecode(words[word3]))
    print("Done")
Example #23
0
    def parse_store_detail(self, response):
        hxs = HtmlXPathSelector(response)

        item = KoubeiStoreItem()
        # Url
        item['link_url'] = response.url
        match = self.city_pattern.match(response.url)
        if match:
            item['city'] = match.group(1)

        # Bread Crumb
        crumb_elems = hxs.select("//div[@class='crumb k2-fix-float']/*").extract()
        if crumb_elems:
            item['bread_crumb'] = u'\xbb'.join([ BeautifulSoup(c).text for c in crumb_elems ])

        # Name
        name_elem = hxs.select("//input[@id='store-full-name']/@value").extract()
        if name_elem:
            item['name'] = name_elem[0]

        # Address
        address_elem = hxs.select("//input[@id='store-address']/@value").extract()
        if address_elem:
            item['address'] = address_elem[0]

        # Telephone
        tel_elem = hxs.select("//input[@id='store-tel']/@value").extract()
        if tel_elem:
            item['tel'] = tel_elem[0]
        
        # Average Cost
        avg_elem = hxs.select("//div[@class='store-info-card']//li/text()").extract()
        for text in avg_elem:
            if text.startswith("人均".decode('utf-8')):
                item['avg_cost'] = text.split(u'\uff1a')[1]
                break

        # Rating
        rating_elem = hxs.select("//div[@class='store-free-title k2-fix-float']/p/b/text()").extract()
        if rating_elem:
            item['rating'] = rating_elem[0]
            item['n_rating'] = int(rating_elem[1])

        # Detail
        detail_elem = hxs.select("//div[@class='detail-main']/ul/li").extract()
        for elem in detail_elem:
            text = BeautifulSoup(elem).find('label').text
            if text.startswith('网站地址'.decode('utf-8')):
                item['url'] = text.split(u'\uff1a')[1].strip()
            if text.startswith('店铺标签'.decode('utf-8')):
                item['tag_list'] = [a.text for a in BeautifulSoup(elem).findAll('a')]
            
        # Description
        desc_elem = hxs.select("//div[@class='detail-intro']/div/text()").extract()
        if desc_elem:
            item['description'] = desc_elem[0].strip()

        # Promote
        promote_elems= hxs.select("//div[@id='promote-more']//p").extract()
        promotes = []
        for elem in promote_elems:
            name = BeautifulSoup(elem).find('a').text.strip()
            count = int(BeautifulSoup(elem).find('span').text[1:-1])
            promotes.append((name, count))
        if promotes != []:
            item['promote_list'] = promotes
            
        # Impress
        impress_elems = hxs.select("//div[@id='impress-more']//span/text()").extract()
        if impress_elems:
            item['impress_list'] = [imp.strip() for imp in impress_elems]

        #print "PARSING : %s | %s | %s | %s" % (item['name'], item['tel'], item['address'], item['avg_cost'])
        return item
Example #24
0
# This is a very simple counter to check how many characters and words there are
# in the file by simply joining together all of the <plaintext /> elements.

import re
from BeautifulSoup import BeautifulSoup

file = "output_pretty.xml"
f = open(file, 'r+')
f = f.readlines()
f = ''.join(map(str.strip,f))
f = BeautifulSoup(f)
f = f.findAll('plaintext')
g = []
for item in f:
    g.append(str(item))
f = ' '.join(g)
print len(f)
f = f.split(' ')
print len(f)
Example #25
0
    def update_one(self, show_ccn, link, start_time, duration):
        duration = timedelta(minutes = duration)
        
        # Load detail show page and check if the latest data is updated    
        page = urllib2.urlopen(link)
        soup = BeautifulSoup(page)
        update_time = datetime.strptime(soup.em.string.replace('-', ''), DATE_FORMAT_B)
        if not self.is_latest(show_ccn, update_time):
            # Get H1 tag that includes title data and imdb link data
            h1 = soup.h1.a
            title = h1.string
            # Convert normal imbd link to mobile version
            imdb_mobilelink = "http://m.imdb.com/title/tt"
            imdb_tt_number = h1['href'][27:]
            imdb_link = imdb_mobilelink + ('0' * (7 - len(imdb_tt_number))) + imdb_tt_number

            # Get CSV representation of episode data from TVRage
            csv_link = self.find_csv_link(soup)
            csv_page = urllib2.urlopen(csv_link)

            # Convert CSV string to list of episode
            csv_string = BeautifulSoup(csv_page).pre.string.replace('\r\n', '')
            csv_ep_list = csv_string.split('\n')[1:]
            ep_list = []

            # Examine each episode to classify
            for csv_ep in csv_ep_list:
                ep_info_split = csv_ep.split(',')
                ep_info = []
                for ep_info_item in ep_info_split:
                    if len(ep_info_item) > 0:
                        if (ep_info_item[0] == '"') and (ep_info_item[-1] == '"'):
                            ep_info_item = ep_info_item[1:-1]
                    ep_info.append(ep_info_item)
                idx, season, num, pcode, airdate = ep_info[:5]
                ep_title = ','.join(ep_info[5:-2])
                if ep_info[-1] == 'n':
                    is_special = False
                else:
                    is_special = True

                if idx:
                    if idx.isdigit():
                        idx = int(idx)
                    else:
                        self.logger.error('Index "%s" must be a number - %s for %s' % (idx, str(csv_ep), title))
                else:
                    if is_special:
                        idx = 0
                    else:
                        self.logger.error('Only special episodes can have a empty index - %s for %s' % (str(csv_ep), title))
                        idx = -1
                
                if season.isdigit():
                    season = int(season)
                else:
                    self.logger.error('Season "%s" must be a number - %s for %s' % (season, str(csv_ep), title)) 
                    season = -1

                if num.isdigit():
                    num = int(num)
                else:
                    if is_special != True:
                        self.logger.error('Episode number "%s" must be a number - %s for %s' % (num, str(csv_ep), title))
                        num = -1
                    else:
                        num = 0

                airdate = str(airdate.replace('"', ''))
                date_fmt = re.compile('\d\d/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/\d\d')
                if airdate == 'UNKNOWN':
                    air_dt_tuple = [9999, 12, 31, 23, 59]
                elif date_fmt.match(airdate):
                    # Combine timeslot and date to make datetime object and convert timezone to kst
                    kst_air_dt = self.dc.convert_tz('kst', (str(airdate), start_time))
                    kst_end_dt = kst_air_dt + duration
                    air_dt_tuple = self.dc.datetime_to_tuple(kst_air_dt)
                    
                    ep_ccn = show_ccn + ep_title + str(idx)
                    if kst_air_dt > self.now:
                        status = 'yet'
                        self.next.update({'ccn': ep_ccn}, {'$set': {'title': title, 'ad': air_dt_tuple}}) 
                    elif kst_end_dt < self.now:
                        status = 'aired'
                    else:
                        status = 'airing'
                elif airdate == 'UNAIRED':
                    air_dt_tuple = [0000, 1, 1, 0, 0]
                else:
                    air_dt_tuple = [-1, -1, -1, -1, -1]

                # Build up episode information list. 
                ep = [idx, season, num, pcode, air_dt_tuple, ep_title, is_special]
                ep_list.append(ep)

            # Update detail information about the show
            self.shows.update({'ccn': show_ccn}, {'$set': {'episodes': ep_list}})
            self.update_timestamp(show_ccn, update_time)

            # Upsert show into index
            self.index.update({'ccn': show_ccn}, {'$set': {'title': title, 'imdb_m': imdb_link}}, upsert=True)
            self.temp_index.append([show_ccn, title, imdb_link])
            self.logger.info('Episodes Updated for ' + show_ccn + ' titled with ' + title)  
        else:
            # Data is the latest, so no need to update
            self.logger.info('Episodes for ' + show_ccn + ' is latest.')
Example #26
0
    def parse_store_detail(self, response):
        hxs = HtmlXPathSelector(response)

        item = KoubeiStoreItem()
        # Url
        item['link_url'] = response.url
        match = self.city_pattern.match(response.url)
        if match:
            item['city'] = match.group(1)

        # Bread Crumb
        crumb_elems = hxs.select(
            "//div[@class='crumb k2-fix-float']/*").extract()
        if crumb_elems:
            item['bread_crumb'] = u'\xbb'.join(
                [BeautifulSoup(c).text for c in crumb_elems])

        # Name
        name_elem = hxs.select(
            "//input[@id='store-full-name']/@value").extract()
        if name_elem:
            item['name'] = name_elem[0]

        # Address
        address_elem = hxs.select(
            "//input[@id='store-address']/@value").extract()
        if address_elem:
            item['address'] = address_elem[0]

        # Telephone
        tel_elem = hxs.select("//input[@id='store-tel']/@value").extract()
        if tel_elem:
            item['tel'] = tel_elem[0]

        # Average Cost
        avg_elem = hxs.select(
            "//div[@class='store-info-card']//li/text()").extract()
        for text in avg_elem:
            if text.startswith("人均".decode('utf-8')):
                item['avg_cost'] = text.split(u'\uff1a')[1]
                break

        # Rating
        rating_elem = hxs.select(
            "//div[@class='store-free-title k2-fix-float']/p/b/text()"
        ).extract()
        if rating_elem:
            item['rating'] = rating_elem[0]
            item['n_rating'] = int(rating_elem[1])

        # Detail
        detail_elem = hxs.select("//div[@class='detail-main']/ul/li").extract()
        for elem in detail_elem:
            text = BeautifulSoup(elem).find('label').text
            if text.startswith('网站地址'.decode('utf-8')):
                item['url'] = text.split(u'\uff1a')[1].strip()
            if text.startswith('店铺标签'.decode('utf-8')):
                item['tag_list'] = [
                    a.text for a in BeautifulSoup(elem).findAll('a')
                ]

        # Description
        desc_elem = hxs.select(
            "//div[@class='detail-intro']/div/text()").extract()
        if desc_elem:
            item['description'] = desc_elem[0].strip()

        # Promote
        promote_elems = hxs.select("//div[@id='promote-more']//p").extract()
        promotes = []
        for elem in promote_elems:
            name = BeautifulSoup(elem).find('a').text.strip()
            count = int(BeautifulSoup(elem).find('span').text[1:-1])
            promotes.append((name, count))
        if promotes != []:
            item['promote_list'] = promotes

        # Impress
        impress_elems = hxs.select(
            "//div[@id='impress-more']//span/text()").extract()
        if impress_elems:
            item['impress_list'] = [imp.strip() for imp in impress_elems]

        #print "PARSING : %s | %s | %s | %s" % (item['name'], item['tel'], item['address'], item['avg_cost'])
        return item
Example #27
0
import telnetlib
import time
from BeautifulSoup import BeautifulSoup

# Debug Variable
debug = False

# Registers FireFox as available browser
ffpath = 'C:\\Program Files (x86)\\Mozilla Firefox\\firefox.exe'
webbrowser.register('firefox', None, webbrowser.BackgroundBrowser(ffpath), 1)

# Retrieve Array of Words to use from Long Wikipedia Article
random_words = urllib2.urlopen('https://en.wikipedia.org/wiki/1918_New_Year_Honours').read()
clean_words = BeautifulSoup(random_words).text
words = []
for word in clean_words.split():
    words.append(word)
print("Length of Array of Words is " + str(len(words)))

# Function to search Bing and acquire points
def search_bing():
    word1 = random.randint(0,len(words))
    word2 = random.randint(0,len(words))
    word3 = random.randint(0,len(words))
    #webbrowser.open_new('https://www.bing.com/search?q='+words[word1]+'+'+words[word2]+'+'+words[word3])
    open_curr_tab('https://www.bing.com/search?q='+words[word1]+'+'+words[word2]+'+'+words[word3])
    print("Done")

# Establish TelNet Session to open FireFox so all searches are contained in one tab.
HOST = 'localhost'
PORT = 4242
Example #28
0
try:
    mode = args.get('mode', None)
except (SyntaxError, TypeError) as e:
    xbmc.log(msg='Error: %s' % str(e), level=xbmc.LOGERROR)

if mode is None:
    li = ''
    response = requests.get(url, headers=headers)
    # response.status
    htm = response.text
    htm = htm.encode('iso-8859-1', 'ignore')
    htm = BeautifulSoup(htm)
    htm = str(htm)

    htm = htm.split('<div id="aul">', 1)[-1]
    htm = htm.split('<a href="/es/majorcompetitions/">', 1)[0]
    htm = [line for line in htm.split('\n') if '<a class="main" ' in line]
    htm = re.sub(r'\t', r'', ''.join(htm))
    htm = re.sub(r'</td>', r'</td>\n', htm)
    htm = re.sub(r'</a><td background=', r'</a>\n<td background=', htm)
    htm = re.sub(r'(?m)^<td background=.*\n?', r'', htm)
    htm = parser.unescape(htm.decode('utf-8').strip())

    query = """<a class=.+?href="(.+?)".+?<b>(.+?)</b>.+?"""
    sports = re.compile(query, re.DOTALL).findall(htm)

    #print (htm)
    #print (sports)

    for s in sports:
Example #29
0
    for page in urlList:
        if docIDCounter > retrieveLimit:
            break #quits crawling if retrieval limit is reached
        try:
            #---------- Page Crawler (gets words and links from each page ---------
            soup = ""
            browse.open(page)
            if page.endswith(".txt"):
                soup = browse.response().read()
            else:
                soup = BeautifulSoup(browse.response().read()) #if can't parse, assumed to be binary file or 404
                soup = soup.getText()
            hashTest = hashlib.md5(soup.encode('utf-8')).hexdigest()
            if hashTest not in duplicateDetect:
                duplicateDetect.append(hashTest)
                wordsInPage = soup.split()
                if not page.endswith(".txt"):

                    for link in browse.links():
                        tempURL = urlparse.urljoin(link.base_url, link.url)
                        #BELOW: gets rid of duplicate urls resulting from index.html/index.htm
                        if tempURL.endswith("index.html"):
                            tempURL = tempURL.replace("index.html", "")
                        elif tempURL.endswith("index.htm"):
                            tempURL = tempURL.replace("index.htm", "")


                        if tempURL not in urlList:
                            if tempURL.startswith(baseUrl):
                                if robots.can_fetch("*", "/" + link.url): #checks robots.txt, necessary because of unusual robots.txt location
                                    urlList.append(tempURL)
Example #30
0
        if docIDCounter > retrieveLimit:
            break  #quits crawling if retrieval limit is reached
        try:
            #---------- Page Crawler (gets words and links from each page ---------
            soup = ""
            browse.open(page)
            if page.endswith(".txt"):
                soup = browse.response().read()
            else:
                soup = BeautifulSoup(browse.response().read(
                ))  #if can't parse, assumed to be binary file or 404
                soup = soup.getText()
            hashTest = hashlib.md5(soup.encode('utf-8')).hexdigest()
            if hashTest not in duplicateDetect:
                duplicateDetect.append(hashTest)
                wordsInPage = soup.split()
                if not page.endswith(".txt"):

                    for link in browse.links():
                        tempURL = urlparse.urljoin(link.base_url, link.url)
                        #BELOW: gets rid of duplicate urls resulting from index.html/index.htm
                        if tempURL.endswith("index.html"):
                            tempURL = tempURL.replace("index.html", "")
                        elif tempURL.endswith("index.htm"):
                            tempURL = tempURL.replace("index.htm", "")

                        if tempURL not in urlList:
                            if tempURL.startswith(baseUrl):
                                if robots.can_fetch(
                                        "*", "/" + link.url
                                ):  #checks robots.txt, necessary because of unusual robots.txt location
Example #31
0
def get_event_list(url):
    li = ''
    response = requests.get(url, headers=headers)
    # response.status
    htm = response.text
    htm = htm.encode('iso-8859-1', 'ignore')
    htm = BeautifulSoup(htm)
    image = url.split('/')
    image = media + "/sports/" + image[5] + ".sport.png"
    plot = htm.findAll('span', attrs={'class': re.compile('sltitle')})
    plot = parser.unescape(
        str(plot).replace('\t', '').replace('\n', '').replace('[', '').replace(
            ']', '').decode('utf-8').strip())
    plot = re.sub(r'<[^>]*>', r'', plot)
    htm = str(htm)

    tday = strftime("%-d de %B, %A ", localtime())
    htm = htm.split('<span class="sltitle">', 1)[-1]
    htm = htm.split('<a href="/es/archive/">', 1)[0]
    htm = re.sub(r'\t', r'', ''.join(htm))
    htm = re.sub(r'\n', r'', htm)
    htm = re.sub(r'/icons/', r'\n/icons/', htm)
    htm = re.sub(r'</span>', r'</span>\n', htm)
    htm = [line for line in htm.split('\n') if '/icons/' in line]
    htm = '\n'.join(htm)
    htm = parser.unescape(htm.decode('utf-8').strip())

    query = """/icons/(.+?)".+?<a class=.+?href="(.+?)">(.+?)</a>.+?"evdesc">(.+?)<.+?>(.+?)</span>"""
    events = re.compile(query, re.DOTALL).findall(htm)
    events = list(dict.fromkeys(events))

    #	print (htm2)
    #	print (events)

    for e in events:
        #image = "http://cdn.livetvcdn.net/img/icons/" + e[0]
        hrefs = urlbase + e[1]
        event = e[2]
        time = change_date_format(e[3])
        mins = 1440 - hms_to_m(str(datetime.datetime.now().time())[:-7])
        tnow = datetime.datetime.now() + datetime.timedelta(minutes=30)
        tday = datetime.datetime.now() + datetime.timedelta(minutes=mins)
        desc = e[4]
        #		desc_image = unicodedata.normalize('NFD', desc[1:-1]).encode('ascii', 'ignore')
        #
        #		response_image = google_images_download.googleimagesdownload()
        #		arguments_image = {
        #			"keywords": desc_image[1:-1],
        #			"suffix_keywords": "logo",
        #			"limit": 1,
        #			"format": "jpg",
        #			"output_directory": "storage",
        #			#"image_directory": "pictures",
        #			"no_directory": True,
        #			"no_download": True
        #		}
        #		absolute_image_paths = response_image.download(arguments_image)
        #		image = absolute_image_paths[desc_image[1:-1]+' logo'][0]

        if time < tnow:
            time = time.strftime("%d/%m/%y %H:%M")
            url = build_url({'mode': 'folder', 'foldername': hrefs})
            li = xbmcgui.ListItem('[COLOR lightskyblue](' + time +
                                  ')[/COLOR] [B]' + event +
                                  '[/B] [COLOR lightseagreen]' + desc +
                                  '[/COLOR]')
            li.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            li.setInfo('video', {'plot': plot})
            xbmcplugin.addSortMethod(handle=addon_handle,
                                     sortMethod=xbmcplugin.SORT_METHOD_LABEL)
            xbmcplugin.addDirectoryItem(handle=addon_handle,
                                        url=url,
                                        listitem=li,
                                        isFolder=True)

        elif time < tday:
            time = time.strftime("%d/%m/%y %H:%M")
            url = ''
            li = xbmcgui.ListItem('[I][COLOR lightskyblue](' + time +
                                  ')[/COLOR] [B]' + event +
                                  '[/B] [COLOR lightseagreen]' + desc +
                                  '[/COLOR][/I]')
            li.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            li.setInfo('video', {'plot': plot})
            xbmcplugin.addSortMethod(handle=addon_handle,
                                     sortMethod=xbmcplugin.SORT_METHOD_LABEL)
            xbmcplugin.addDirectoryItem(handle=addon_handle,
                                        url=url,
                                        listitem=li,
                                        isFolder=False)

    if li == '':
        image = media + '/33056.png'
        url = ''
        li = xbmcgui.ListItem('[I][B]' + get_localized_string(T_NO_LSTRM) +
                              '[/B][/I]')
        li.setArt({
            'fanart': addonID.getAddonInfo('fanart'),
            'icon': image,
            'thumb': image,
            'poster': image
        })
        li.setInfo('video', {'plot': plot})
        xbmcplugin.addDirectoryItem(handle=addon_handle,
                                    url=url,
                                    listitem=li,
                                    isFolder=False)

    xbmcplugin.endOfDirectory(handle=addon_handle, succeeded=True)