Ejemplos de decode en Python, ejemplos de helpers.decode en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: news_interface.py Proyecto: IshmatovMaxim/perspectives

  def get_article(self, url):
    '''Returns an Article representing the article at url.'''
    try:
      html = helpers.get_content(url)
      if not html:
        return None

      soup = BeautifulSoup(html)
      headline = self.get_headline(soup)
      body = self.get_body(soup)
      date = self.get_date(soup)
    except Exception as e:
      logger.log.error("Hit exception on line number %s getting article for %s:"
                       " %s" % (sys.exc_info()[-1].tb_lineno, url, e))
      return None

    try:
      headline = helpers.decode(headline)
      body = helpers.decode(body)
      date = helpers.decode(date)
    except Exception as e:
      logger.log.error('Error on line %s decoding url %s: %s' %
                       (sys.exc_info()[-1].tb_lineno, url, e))
      return None

    logger.log.info('URL: %s' % url)
    logger.log.info('headline: %s' % headline)
    logger.log.info('Body: %s' % body)

    return news_interface.Article(headline, body, url, self.news_org,
                                  date)

Ejemplo n.º 2

0

Mostrar archivo

    def get_article(self, url):
        '''Implementation for getting an article from the NYTimes.

    url: A URL in the ny_times.com domain.

    Returns: The Article representing the article at that url.
    '''
        html = helpers.get_content(url)
        if not html:
            return None

        soup = BeautifulSoup(html)
        headline = helpers.decode(soup.h1.string)

        try:
            article = soup.find('div', attrs={'class': 'articleBody'})
            paragraphs = article.find_all('p',
                                          attrs={'itemprop': 'articleBody'})
        except AttributeError:
            # this article's html uses different attributes... sigh...
            # Hopefully there are only 2 versions
            article = soup.find('div', attrs={'class': 'story-body'})
            paragraphs = article.find_all('p',
                                          attrs={'class': 'story-content'})

        p_text = [helpers.decode(p.get_text()) for p in paragraphs]
        body = ' '.join([p for p in p_text])

        log.info(headline)
        log.info(body)
        return news_interface.Article(headline, body, url, news_orgs.NY_TIMES)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: news_interface.py Proyecto: leilacc/perspectives

    def get_article(self, url):
        '''Returns an Article representing the article at url.'''
        try:
            html = helpers.get_content(url)
            if not html:
                return None

            soup = BeautifulSoup(html)
            headline = self.get_headline(soup)
            body = self.get_body(soup)
            date = self.get_date(soup)
        except Exception as e:
            logger.log.error(
                "Hit exception on line number %s getting article for %s:"
                " %s" % (sys.exc_info()[-1].tb_lineno, url, e))
            return None

        try:
            headline = helpers.decode(headline)
            body = helpers.decode(body)
            date = helpers.decode(date)
        except Exception as e:
            logger.log.error('Error on line %s decoding url %s: %s' %
                             (sys.exc_info()[-1].tb_lineno, url, e))
            return None

        logger.log.info('URL: %s' % url)
        logger.log.info('headline: %s' % headline)
        logger.log.info('Body: %s' % body)

        return news_interface.Article(headline, body, url, self.news_org, date)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: russia_today.py Proyecto: pandrewhk/perspectives

    def get_article(self, url):
        '''Implementation for getting an article from the Russia Today.

    url: A URL in the russia_today.com domain.

    Returns: The Article representing the article at that url.
    '''
        html = helpers.get_content(url)
        if not html:
            return None

        soup = BeautifulSoup(html)
        headline = helpers.decode(soup.h1.string)

        article = soup.find('div', attrs={'class': 'cont-wp'})
        paragraphs = article.find_all('p', attrs={'class': None})
        p_text = [helpers.decode(p.get_text()) for p in paragraphs]
        # Get rid of 'Tags' and 'Trends' headers, and 'READ MORE' links
        body = ' '.join([
            p for p in p_text
            if not (p.startswith('\nREAD') or p == 'Tags' or p == 'Trends')
        ])

        log.info(headline)
        log.info(body)
        return news_interface.Article(headline, body, url,
                                      news_orgs.RUSSIA_TODAY)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: testboard.py Proyecto: mdonega/pyxar

 def get_ph_roc(self, n_triggers, roc):
     self.select_roc(roc)
     n_hits = []
     ph_sum = []
     address = []
     self.logger.debug('PH %s , n_triggers: %s' %(roc, n_triggers) )
     self.calibrate(n_triggers, n_hits, ph_sum, address)
     cals = decode(roc.n_cols, roc.n_rows, address, n_hits)
     phs = decode(roc.n_cols, roc.n_rows, address, ph_sum)
     # allow division by 0
     old_err_state = numpy.seterr(divide='raise')
     ignored_states = numpy.seterr(**old_err_state)
     roc.data = numpy.nan_to_num(numpy.divide(phs, cals))

Ejemplo n.º 6

0

Mostrar archivo

def translate_text(text, target_language):
    if len(text) > 10000:
        print('Error: Text too large. Maximum: 10000 characters')
        sys.exit()
    try:
        client = translate.Client()
    except:
        credentials()
        sys.exit()
    try:
        confidence = client.detect_language(text)
        result = client.translate(text, target_language)
        if opt_c == True:
            print('Detected language confidence: '),
            print('{:.2%}'.format(confidence['confidence']))  # convert to %
        if opt_b != True:
            if result['detectedSourceLanguage'] != target_language:
                print_language_name(result['detectedSourceLanguage'])
                print(result['input'])
            print_language_name(target_language)
        print(decode(result['translatedText'])).encode('utf-8')
        if opt_s == True:
            text_to_speech(result['translatedText'], target_language)
    except Exception as e:
        print('Error: '),
        print(e)
        sys.exit()

Ejemplo n.º 7

0

Mostrar archivo

def image():
    i = request.files['image']  # get the image
    f = ('%s.jpeg' % time.strftime("%Y%m%d-%H%M%S"))
    i.save('%s/%s' % (PATH_TO_TEST_IMAGES_DIR, f))
    data = decode('%s/%s' % (PATH_TO_TEST_IMAGES_DIR, f))
    os.remove('%s/%s' % (PATH_TO_TEST_IMAGES_DIR, f))
    return Response("%s saved./n data: %s " % (f, data))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: russia_today.py Proyecto: devgona/perspectives

 def get_body(self, soup):
     article = soup.find("div", attrs={"class": "cont-wp"})
     paragraphs = article.find_all("p", attrs={"class": None})
     p_text = [helpers.decode(p.get_text()) for p in paragraphs]
     # Get rid of 'Tags' and 'Trends' headers, and 'READ MORE' links
     body = " ".join([p for p in p_text if not (p.startswith("\nREAD") or p == "Tags" or p == "Trends")])
     return body

Ejemplo n.º 9

0

Mostrar archivo

  def get_article(self, url):
    '''Implementation for getting an article from USA Today.

    url: A URL in the http://www.usatoday.com/story/* domain.

    Returns: The Article representing the article at that url.
    '''
    html = helpers.get_content(url)
    if not html:
      return None

    soup = BeautifulSoup(html)
    article = soup.article
    headline = helpers.decode(article.h1.string)
    paragraphs = article.find_all('p', attrs={'class': None})
    body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs])
    return news_interface.Article(headline, body, url, news_orgs.USA_TODAY)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: russia_today.py Proyecto: leilacc/perspectives

 def get_body(self, soup):
   article = soup.find('div', attrs={'class': 'cont-wp'})
   paragraphs = article.find_all('p', attrs={'class': None})
   p_text = [helpers.decode(p.get_text()) for p in paragraphs]
   # Get rid of 'Tags' and 'Trends' headers, and 'READ MORE' links
   body = ' '.join([p for p in p_text if not (p.startswith('\nREAD') or
                                     p == 'Tags' or
                                     p == 'Trends')])
   return body

Ejemplo n.º 11

0

Mostrar archivo

Archivo: ny_post.py Proyecto: pandrewhk/perspectives

    def get_article(self, url):
        '''Implementation for getting an article from the New York Post.

    url: A URL in the nypost.com domain.

    Returns: The Article representing the article at that url.
    '''
        html = helpers.get_content(url)
        if not html:
            return None

        soup = BeautifulSoup(html)
        headline = helpers.decode(soup.h1.a.string)
        article = soup.find('div', attrs={'class': 'entry-content'})
        paragraphs = article.find_all('p', attrs={'class': None})
        body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs])
        log.info(headline)
        log.info(body)
        return news_interface.Article(headline, body, url, news_orgs.NY_POST)

Ejemplo n.º 12

0

Mostrar archivo

    def get_article(self, url):
        '''Implementation for getting an article from Al Jazeera.

    Args:
      url: A URL in the aljazeera.* domain.

    Returns:
      The Article representing the article at that url, or None if unable to
      get the Article.
    '''
        html = helpers.get_content(url)
        if not html:
            return None

        soup = BeautifulSoup(html)

        headline = None
        potential_classes = ["heading-story", "articleOpinion-title"]
        for h1_class in potential_classes:
            try:
                headline = soup.find("h1", {"class": h1_class}).string
                break
            except AttributeError:
                continue
        if not headline:
            log.error(
                'Exception trying to scrape Al Jazeera headline from %s' %
                (url))
            return None

        headline = helpers.decode(headline)

        try:
            paragraphs = soup.find("div", {"class": "article-body"})
            article = paragraphs.findAll("p")
        except AttributeError:
            paragraphs = soup.find("div", {"class": "text"})
            article = paragraphs.findAll("p")
        body = ' '.join([helpers.decode(p.text) for p in article])
        #log.info(headline)
        #log.info(body)
        return news_interface.Article(headline, body, url, news_orgs.ALJAZEERA)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: todays_zaman.py Proyecto: pandrewhk/perspectives

  def get_article(self, url):
    '''Implementation for getting an article from Todays Zaman.

    Args:
      url: A URL in the www.todayszaman.com/* domain.

    Returns:
      The Article representing the article at that url.
    '''
    html = helpers.get_content(url)
    if not html:
      return None

    soup = BeautifulSoup(html)
    a = soup.find("title")
    headline = helpers.decode(a.text)
    paragraphs = soup.find("div", {"id": "newsText"})
    article = paragraphs.findAll("p")
    body = ' '.join([helpers.decode(p.text) for p in article])
    log.info(headline)
    log.info(body)
    return news_interface.Article(headline, body, url, news_orgs.TODAYS_ZAMAN)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: ny_times.py Proyecto: devgona/perspectives

  def get_body(self, soup):
    try:
      article = soup.find('div', attrs={'class': 'articleBody'})
      paragraphs = article.find_all('p', attrs={'itemprop': 'articleBody'})
    except AttributeError:
      # this article's html uses different attributes... sigh...
      # Hopefully there are only 2 versions
      article = soup.find('div', attrs={'class': 'story-body'})
      paragraphs = article.find_all('p', attrs={'class': 'story-content'})

    p_text = [helpers.decode(p.get_text()) for p in paragraphs]
    body = ' '.join([p for p in p_text])
    return body

Ejemplo n.º 15

0

Mostrar archivo

Archivo: run.py Proyecto: nileshbhoyar/textsummary

def decode():
    print "This is for interactive Version....."

    metadata, idx_q, idx_a = data_utils.ourmodel.data_util.load_data()
    (trainX, trainY), (testX,
                       testY), (validX,
                                validY) = helpers.split_dataset(idx_q, idx_a)

    model = create_model(metadata, trainX.shape[-1], trainY.shape[-1])

    sess = model.restore_last_session()
    sys.stdout.write("> ")
    sys.stdout.flush()
    sentence = sys.stdin.readline()
    while sentence:
        #process input strings now"
        inputs = data_utils.ourmodel.data_util.get_tokens(sentence)
        fqtokens = [w for w in inputs if not w in stopwords.words('english')]
        processed_input = data_utils.ourmodel.data_util.zero_pad_single(
            fqtokens, metadata['w2idx'])
        #sess = model.restore_last_session()
        output = model.predict(sess, processed_input.T)
        #replies = []

        for ii, ot in zip(processed_input, output.T):
            q = helpers.decode(sequence=ii,
                               lookup=metadata['idx2w'],
                               separator=' ')
            decoded = helpers.decode(sequence=ot,
                                     lookup=metadata['idx2w'],
                                     separator=' ').split(' ')

            #if decoded.count('unk') == 0:
            #   if decoded not in replies:
            print('Review : [{0}]; Summary : [{1}]'.format(
                q, ' '.join(decoded)))

        sys.stdout.flush()
        sentence = sys.stdin.readline()

Ejemplo n.º 16

0

Mostrar archivo

Archivo: reuters.py Proyecto: pandrewhk/perspectives

    def get_article(self, url):
        '''Implementation for getting an article from REUTERS.

    url: A URL in the www.reuters.com* domain.

    Returns: The Article representing the article at that url.
    '''
        html = helpers.get_content(url)
        if not html:
            return None

        soup = BeautifulSoup(html)
        headline_div = soup.find('div',
                                 attrs={'class': 'column1 gridPanel grid8'})
        headline = helpers.decode(headline_div.h1.string)
        paragraphs = soup.find('div',
                               attrs={
                                   'class': 'column1 gridPanel grid8'
                               }).findAll("p")
        body = ' '.join([helpers.decode(p.text) for p in paragraphs])
        log.info(headline)
        log.info(body)
        return news_interface.Article(headline, body, url, news_orgs.REUTERS)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: ny_times.py Proyecto: leilacc/perspectives

    def get_body(self, soup):
        try:
            article = soup.find('div', attrs={'class': 'articleBody'})
            paragraphs = article.find_all('p',
                                          attrs={'itemprop': 'articleBody'})
        except AttributeError:
            # this article's html uses different attributes... sigh...
            # Hopefully there are only 2 versions
            article = soup.find('div', attrs={'class': 'story-body'})
            paragraphs = article.find_all('p',
                                          attrs={'class': 'story-content'})

        p_text = [helpers.decode(p.get_text()) for p in paragraphs]
        body = ' '.join([p for p in p_text])
        return body

Ejemplo n.º 18

0

Mostrar archivo

    def get_article(self, url):
        '''Implementation for getting an article from Times of Israel.

    Args:
      url: A URL in the www.timesofisrael.com/* domain.

    Returns:
      The Article representing the article at that url.
    '''
        html = helpers.get_content(url)
        if not html:
            return None

        soup = BeautifulSoup(html)

        h1 = soup.find('h1', attrs={'class': 'headline'})
        headline = helpers.decode(h1.text)
        paragraphs = soup.findAll("p", {"itemprop": "articleBody"})
        body = ' '.join([helpers.decode(p.text) for p in paragraphs])

        log.info(headline)
        log.info(body)
        return news_interface.Article(headline, body, url,
                                      news_orgs.TIMES_OF_ISRAEL)

Ejemplo n.º 19

0

Mostrar archivo

    def handleDiscovery(self, dev, isNewDev, isNewData):
        data = dev.getScanData()
        for d in data:
            print(dev.scanData)
            if 255 in dev.scanData and len(dev.scanData[255]) == 29:
                print(len(dev.scanData[255]))
                raw_data = " ".join(
                    ["{0:0>2X}".format(ord(b)) for b in dev.scanData[255]])
                d = decode(raw_data)
                if d.search:
                    m = self.config["i"]
                else:
                    m = self.config["a"]

                for i, b in enumerate(d.interests):
                    if b:
                        if i in m:
                            return self.start_alarm(dev.rssi)

Ejemplo n.º 20

0

Mostrar archivo

def text_to_speech(text, lang):
    try:
        text = decode(text)
        client = texttospeech.TextToSpeechClient()
        input_text = texttospeech.types.SynthesisInput(text=text)
        voice = texttospeech.types.VoiceSelectionParams(
            language_code=lang,
            ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)
        audio_config = texttospeech.types.AudioConfig(
            audio_encoding=texttospeech.enums.AudioEncoding.MP3)

        response = client.synthesize_speech(input_text, voice, audio_config)
    except:
        return
    try:
        print('ðŸ”‰')
        with open('output-95af1670a84.mp3', 'wb') as out:
            out.write(response.audio_content)
        os.system('afplay output-95af1670a84.mp3')
        os.system('rm -f output-95af1670a84.mp3')
    except:
        print('Error: Audio could not be played')
        os.system('rm =f output-95af1670a84.mp3')

Ejemplo n.º 21

0

Mostrar archivo

# %%
cols_interest = [
    'age', 'ratly', 'rly', 'ptmm', 'pts', 'grd', 'mor', 'ply', 'horm'
]
df = df[cols_interest]

# %%
# Perform classic CPH
# We will print the table of coefficients as well as generate a visual representation.

#%%

# Encode categorical values accordingly.
df_dummies = df.copy(deep=True)
df_dummies = helpers.decode(df_dummies)
df_dummies = pd.get_dummies(df_dummies, columns=['pts', 'grd', 'mor', 'horm'])

# Drop reference columns.
df_dummies.drop(columns=['pts_1', 'grd_1', 'mor_lobular', 'horm_3+'],
                inplace=True)

# Fix column ordering
columns = list(df_dummies.columns.values)
columns.append('horm_3-')
columns.remove('horm_3-')

# Visual representation.
fig, ax = survival_analysis.cph(df_dummies,
                                T,
                                C,

Ejemplo n.º 22

0

Mostrar archivo

Archivo: ny_times.py Proyecto: leilacc/perspectives

 def get_headline(self, soup):
     headline = helpers.decode(soup.h1.string)
     return headline

Ejemplo n.º 23

0

Mostrar archivo

Archivo: usa_today.py Proyecto: IshmatovMaxim/perspectives

 def get_body(self, soup):
   article = soup.article
   paragraphs = article.find_all('p', attrs={'class': None})
   body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs])
   return body

Ejemplo n.º 24

0

Mostrar archivo

Archivo: ny_times.py Proyecto: devgona/perspectives

 def get_headline(self, soup):
   headline = helpers.decode(soup.h1.string)
   return headline

Ejemplo n.º 25

0

Mostrar archivo

Archivo: times_of_israel.py Proyecto: leilacc/perspectives

 def get_body(self, soup):
   paragraphs = soup.findAll("p", {"itemprop": "articleBody"})
   body = ' '.join([helpers.decode(p.text) for p in paragraphs])
   return body

Ejemplo n.º 26

0

Mostrar archivo

def plot_shap_dependence_int(col, shap_values, X_test, fig=None, ax=None):
    """
    Generate plot of SHAP dependence values.

    Parameters
    ----------
    col: string
        Column name of the variable to analyze.
        
    shap_values:
        SHAP values
        
    X_test: pandas DataFrame
        Test data. Must correspond to the data used to compute the 
        SHAP values.
        
    fig: figure handle (optional)
    
    ax: axes handle (optional)

    Returns
    -------
    fig, ax
        Handles to figure and axes of the plot.    
    """

    # Create figure (if necessary).
    if fig is None and ax is None:
        fig, ax = plt.subplots(1, 1, figsize=[3, 3])
    elif fig is None:
        fig = ax.get_figure()
    elif ax is None:
        ax = fig.gca()

    X_test_decoded = helpers.decode(X_test)

    plt.axhline(y=0,
                xmin=-10,
                xmax=10,
                linewidth=2.5,
                linestyle='--',
                color=[0.6, 0.6, 0.6])
    shap.dependence_plot(col,
                         shap_values,
                         X_test,
                         display_features=X_test_decoded,
                         alpha=0.5,
                         dot_size=5,
                         x_jitter=1,
                         ax=ax,
                         show=False)
    ax.set_xlabel(" ")
    ax.set_ylabel(" ")

    # Specific formatting per feature.
    if col == 'age':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(20))
    elif col == 'pts':
        pass
    elif col == 'ptmm':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(50))
    elif col == 'grd':
        ax.set_ylabel("SHAP value")
    elif col == 'ply':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
    elif col == 'horm':
        pass
    elif col == 'ratly':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(0.25))
    elif col == 'rly':
        pass
    elif col == 'mor':
        pass

    plt.show()

    return fig, ax

Ejemplo n.º 27

0

Mostrar archivo

Archivo: run.py Proyecto: nileshbhoyar/textsummary

def self_test():
    print " In Test Mode"
    metadata, idx_q, idx_a = data_utils.ourmodel.data_util.load_data()
    (trainX, trainY), (testX,
                       testY), (validX,
                                validY) = helpers.split_dataset(idx_q, idx_a)

    model = create_model(metadata, trainX.shape[-1], trainY.shape[-1])

    if FLAGS.celltype == 'GRU':
        if FLAGS.attention == False:
            ckpt_paths = 'ckpt/checkpoint/GRU/noAttention/'
        else:
            ckpt_paths = 'ckpt/checkpoint/GRU/Attention/'

    else:
        if FLAGS.attention == False:
            ckpt_paths = 'ckpt/checkpoint/LSTM/noAttention/'
        else:
            ckpt_paths = 'ckpt/checkpoint/LSTM/Attention/'

    print "Retrieving Last Model State"
    XX = np.load('datasets/test_review.npy', mmap_mode='r')
    YY = np.load('datasets/test_summary.npy', mmap_mode='r')
    result = [[0 for x in range(6)] for y in range(XX.shape[0])]

    sess = model.restore_last_session()
    batch_size = 16
    if sess:
        for i in range(0, XX.shape[0], batch_size):
            if (i + 1) + batch_size < XX.shape[0]:
                output = model.predict(sess, XX[i:(i + 1) + batch_size].T)
                nn = XX[i:(i + 1) + batch_size]
                for j in range(nn.shape[0]):
                    result[i + j][0] = helpers.decode(sequence=XX[i + j],
                                                      lookup=metadata['idx2w'],
                                                      separator=' ')
                    result[i + j][1] = helpers.decode(sequence=YY[i + j],
                                                      lookup=metadata['idx2w'],
                                                      separator=' ')

                    result[i + j][2] = helpers.decode(sequence=output.T[j],
                                                      lookup=metadata['idx2w'],
                                                      separator=' ')

                    if len(result[i + j][2]) == 0:
                        result[i + j][2] = ['UNK']
                    if len(result[i + j][1]) != 0:
                        result[i + j][3] = score.rouge_n(
                            result[i + j][2], result[i + j][1], 1)
                        result[i + j][4] = score.bleu(result[i + j][2],
                                                      result[i + j][1], 1)
                        result[i + j][5] = score.f1(result[i + j][3],
                                                    result[i + j][4])
                    else:
                        result[i + j][3] = result[i + j][4] = result[i +
                                                                     j][5] = 0
    df = pd.DataFrame(result)
    df.columns = [
        "Review", "Actual Summary", "Generated Summary", "Rogue1", "Bleu1",
        "F1"
    ]
    df = df[:-batch_size]
    print("Average Rogue-1 = %.3f, Max Rouge-1 =%.3f,Min Rogue-1 = %.3f" %
          (df["Rogue1"].mean(), df["Rogue1"].max(), df["Rogue1"].min()))
    print("Average Bleu1 = %.3f, Max Bleu1=%.3f,Min Bleu1 = %.3f" %
          (df["Bleu1"].mean(), df["Bleu1"].max(), df["Bleu1"].min()))
    print("Average F1 = %.3f, Max F1=%.3f,Min F1 = %.3f" %
          (df["F1"].mean(), df["F1"].max(), df["F1"].min()))
    result_file = 'results/default.csv'
    if FLAGS.celltype == 'GRU':
        if FLAGS.attention == False:
            result_file = 'results/GRU_noAttention.csv'
        else:
            result_file = 'results/GRU_Attention.csv'

    else:
        if FLAGS.attention == False:
            result_file = 'results/LSTM_noAttention.csv'
        else:
            result_file = 'results/LSTM_Attention'
    df.to_csv(result_file)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: usa_today.py Proyecto: leilacc/perspectives

 def get_body(self, soup):
   article = soup.article
   paragraphs = article.find_all('p', attrs={'class': None})
   body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs])
   return body

Ejemplo n.º 29

0

Mostrar archivo

def plot_shap_dependence(col, shap_values, X_test, fig=None, ax=None):
    """
    Generate plot of SHAP dependence values.

    Parameters
    ----------
    col: string
        Column name of the variable to analyze.
        
    shap_values: list
        Each element has an array of SHAP values.
        Each group of SHAP values will be plotted with a different color.
        
    X_test: pandas DataFrame
        Test data. Must correspond to the data used to compute the 
        SHAP values.
        
    fig: figure handle (optional)
    
    ax: axes handle (optional)

    Returns
    -------
    fig, ax
        Handles to figure and axes of the plot.    
    """

    # Create figure (if necessary).
    if fig is None and ax is None:
        fig, ax = plt.subplots(1, 1, figsize=[2.5, 2.5])
    elif fig is None:
        fig = ax.get_figure()
    elif ax is None:
        ax = fig.gca()

    X_test_decoded = helpers.decode(X_test)

    # Get right colors.
    interval = np.linspace(0, 1, len(shap_values) + 1)
    colors = [mpl.cm.Blues(x) for x in interval][1:]

    # Flip list order to make sure they are plotted in the right order
    # (XGB in the back with darker color, CPH in the front with lighter color)
    shap_values.reverse()
    colors.reverse()

    plt.axhline(y=0,
                xmin=-10,
                xmax=10,
                linewidth=2.5,
                linestyle='--',
                color=[0.6, 0.6, 0.6])
    for (ii, shap_values_curr), color in zip(enumerate(shap_values), colors):
        shap.dependence_plot(col,
                             shap_values_curr + (0.1 * ii),
                             X_test,
                             display_features=X_test_decoded,
                             color=color,
                             interaction_index=None,
                             alpha=0.5,
                             dot_size=7.5,
                             x_jitter=1,
                             ax=ax,
                             show=False)
    ax.set_xlabel(" ")
    ax.set_ylabel(" ")
    ax.set_ylim([-1.5, 2.5])

    # Specific formatting per feature.
    if col == 'age':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(20))
    elif col == 'pts':
        pass
    elif col == 'ptmm':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(50))
    elif col == 'grd':
        ax.set_ylabel("SHAP value")
    elif col == 'ply':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
    elif col == 'horm':
        pass
    elif col == 'ratly':
        ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(0.25))
    elif col == 'rly':
        pass
    elif col == 'mor':
        cph_line = mpl.lines.Line2D([], [],
                                    color=colors[1],
                                    marker='o',
                                    label='CPH')
        xgb_line = mpl.lines.Line2D([], [],
                                    color=colors[0],
                                    marker='o',
                                    label='XGB')
        plt.legend(handles=[cph_line, xgb_line],
                   markerscale=1,
                   frameon=False,
                   fontsize='xx-small')

    plt.show()

    return fig, ax