def get_article(self, url): '''Returns an Article representing the article at url.''' try: html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline = self.get_headline(soup) body = self.get_body(soup) date = self.get_date(soup) except Exception as e: logger.log.error("Hit exception on line number %s getting article for %s:" " %s" % (sys.exc_info()[-1].tb_lineno, url, e)) return None try: headline = helpers.decode(headline) body = helpers.decode(body) date = helpers.decode(date) except Exception as e: logger.log.error('Error on line %s decoding url %s: %s' % (sys.exc_info()[-1].tb_lineno, url, e)) return None logger.log.info('URL: %s' % url) logger.log.info('headline: %s' % headline) logger.log.info('Body: %s' % body) return news_interface.Article(headline, body, url, self.news_org, date)
def get_article(self, url): '''Implementation for getting an article from the NYTimes. url: A URL in the ny_times.com domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline = helpers.decode(soup.h1.string) try: article = soup.find('div', attrs={'class': 'articleBody'}) paragraphs = article.find_all('p', attrs={'itemprop': 'articleBody'}) except AttributeError: # this article's html uses different attributes... sigh... # Hopefully there are only 2 versions article = soup.find('div', attrs={'class': 'story-body'}) paragraphs = article.find_all('p', attrs={'class': 'story-content'}) p_text = [helpers.decode(p.get_text()) for p in paragraphs] body = ' '.join([p for p in p_text]) log.info(headline) log.info(body) return news_interface.Article(headline, body, url, news_orgs.NY_TIMES)
def get_article(self, url): '''Returns an Article representing the article at url.''' try: html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline = self.get_headline(soup) body = self.get_body(soup) date = self.get_date(soup) except Exception as e: logger.log.error( "Hit exception on line number %s getting article for %s:" " %s" % (sys.exc_info()[-1].tb_lineno, url, e)) return None try: headline = helpers.decode(headline) body = helpers.decode(body) date = helpers.decode(date) except Exception as e: logger.log.error('Error on line %s decoding url %s: %s' % (sys.exc_info()[-1].tb_lineno, url, e)) return None logger.log.info('URL: %s' % url) logger.log.info('headline: %s' % headline) logger.log.info('Body: %s' % body) return news_interface.Article(headline, body, url, self.news_org, date)
def get_article(self, url): '''Implementation for getting an article from the Russia Today. url: A URL in the russia_today.com domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline = helpers.decode(soup.h1.string) article = soup.find('div', attrs={'class': 'cont-wp'}) paragraphs = article.find_all('p', attrs={'class': None}) p_text = [helpers.decode(p.get_text()) for p in paragraphs] # Get rid of 'Tags' and 'Trends' headers, and 'READ MORE' links body = ' '.join([ p for p in p_text if not (p.startswith('\nREAD') or p == 'Tags' or p == 'Trends') ]) log.info(headline) log.info(body) return news_interface.Article(headline, body, url, news_orgs.RUSSIA_TODAY)
def get_ph_roc(self, n_triggers, roc): self.select_roc(roc) n_hits = [] ph_sum = [] address = [] self.logger.debug('PH %s , n_triggers: %s' %(roc, n_triggers) ) self.calibrate(n_triggers, n_hits, ph_sum, address) cals = decode(roc.n_cols, roc.n_rows, address, n_hits) phs = decode(roc.n_cols, roc.n_rows, address, ph_sum) # allow division by 0 old_err_state = numpy.seterr(divide='raise') ignored_states = numpy.seterr(**old_err_state) roc.data = numpy.nan_to_num(numpy.divide(phs, cals))
def translate_text(text, target_language): if len(text) > 10000: print('Error: Text too large. Maximum: 10000 characters') sys.exit() try: client = translate.Client() except: credentials() sys.exit() try: confidence = client.detect_language(text) result = client.translate(text, target_language) if opt_c == True: print('Detected language confidence: '), print('{:.2%}'.format(confidence['confidence'])) # convert to % if opt_b != True: if result['detectedSourceLanguage'] != target_language: print_language_name(result['detectedSourceLanguage']) print(result['input']) print_language_name(target_language) print(decode(result['translatedText'])).encode('utf-8') if opt_s == True: text_to_speech(result['translatedText'], target_language) except Exception as e: print('Error: '), print(e) sys.exit()
def image(): i = request.files['image'] # get the image f = ('%s.jpeg' % time.strftime("%Y%m%d-%H%M%S")) i.save('%s/%s' % (PATH_TO_TEST_IMAGES_DIR, f)) data = decode('%s/%s' % (PATH_TO_TEST_IMAGES_DIR, f)) os.remove('%s/%s' % (PATH_TO_TEST_IMAGES_DIR, f)) return Response("%s saved./n data: %s " % (f, data))
def get_body(self, soup): article = soup.find("div", attrs={"class": "cont-wp"}) paragraphs = article.find_all("p", attrs={"class": None}) p_text = [helpers.decode(p.get_text()) for p in paragraphs] # Get rid of 'Tags' and 'Trends' headers, and 'READ MORE' links body = " ".join([p for p in p_text if not (p.startswith("\nREAD") or p == "Tags" or p == "Trends")]) return body
def get_article(self, url): '''Implementation for getting an article from USA Today. url: A URL in the http://www.usatoday.com/story/* domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) article = soup.article headline = helpers.decode(article.h1.string) paragraphs = article.find_all('p', attrs={'class': None}) body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs]) return news_interface.Article(headline, body, url, news_orgs.USA_TODAY)
def get_body(self, soup): article = soup.find('div', attrs={'class': 'cont-wp'}) paragraphs = article.find_all('p', attrs={'class': None}) p_text = [helpers.decode(p.get_text()) for p in paragraphs] # Get rid of 'Tags' and 'Trends' headers, and 'READ MORE' links body = ' '.join([p for p in p_text if not (p.startswith('\nREAD') or p == 'Tags' or p == 'Trends')]) return body
def get_article(self, url): '''Implementation for getting an article from the New York Post. url: A URL in the nypost.com domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline = helpers.decode(soup.h1.a.string) article = soup.find('div', attrs={'class': 'entry-content'}) paragraphs = article.find_all('p', attrs={'class': None}) body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs]) log.info(headline) log.info(body) return news_interface.Article(headline, body, url, news_orgs.NY_POST)
def get_article(self, url): '''Implementation for getting an article from Al Jazeera. Args: url: A URL in the aljazeera.* domain. Returns: The Article representing the article at that url, or None if unable to get the Article. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline = None potential_classes = ["heading-story", "articleOpinion-title"] for h1_class in potential_classes: try: headline = soup.find("h1", {"class": h1_class}).string break except AttributeError: continue if not headline: log.error( 'Exception trying to scrape Al Jazeera headline from %s' % (url)) return None headline = helpers.decode(headline) try: paragraphs = soup.find("div", {"class": "article-body"}) article = paragraphs.findAll("p") except AttributeError: paragraphs = soup.find("div", {"class": "text"}) article = paragraphs.findAll("p") body = ' '.join([helpers.decode(p.text) for p in article]) #log.info(headline) #log.info(body) return news_interface.Article(headline, body, url, news_orgs.ALJAZEERA)
def get_article(self, url): '''Implementation for getting an article from Todays Zaman. Args: url: A URL in the www.todayszaman.com/* domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) a = soup.find("title") headline = helpers.decode(a.text) paragraphs = soup.find("div", {"id": "newsText"}) article = paragraphs.findAll("p") body = ' '.join([helpers.decode(p.text) for p in article]) log.info(headline) log.info(body) return news_interface.Article(headline, body, url, news_orgs.TODAYS_ZAMAN)
def get_body(self, soup): try: article = soup.find('div', attrs={'class': 'articleBody'}) paragraphs = article.find_all('p', attrs={'itemprop': 'articleBody'}) except AttributeError: # this article's html uses different attributes... sigh... # Hopefully there are only 2 versions article = soup.find('div', attrs={'class': 'story-body'}) paragraphs = article.find_all('p', attrs={'class': 'story-content'}) p_text = [helpers.decode(p.get_text()) for p in paragraphs] body = ' '.join([p for p in p_text]) return body
def decode(): print "This is for interactive Version....." metadata, idx_q, idx_a = data_utils.ourmodel.data_util.load_data() (trainX, trainY), (testX, testY), (validX, validY) = helpers.split_dataset(idx_q, idx_a) model = create_model(metadata, trainX.shape[-1], trainY.shape[-1]) sess = model.restore_last_session() sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: #process input strings now" inputs = data_utils.ourmodel.data_util.get_tokens(sentence) fqtokens = [w for w in inputs if not w in stopwords.words('english')] processed_input = data_utils.ourmodel.data_util.zero_pad_single( fqtokens, metadata['w2idx']) #sess = model.restore_last_session() output = model.predict(sess, processed_input.T) #replies = [] for ii, ot in zip(processed_input, output.T): q = helpers.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ') decoded = helpers.decode(sequence=ot, lookup=metadata['idx2w'], separator=' ').split(' ') #if decoded.count('unk') == 0: # if decoded not in replies: print('Review : [{0}]; Summary : [{1}]'.format( q, ' '.join(decoded))) sys.stdout.flush() sentence = sys.stdin.readline()
def get_article(self, url): '''Implementation for getting an article from REUTERS. url: A URL in the www.reuters.com* domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) headline_div = soup.find('div', attrs={'class': 'column1 gridPanel grid8'}) headline = helpers.decode(headline_div.h1.string) paragraphs = soup.find('div', attrs={ 'class': 'column1 gridPanel grid8' }).findAll("p") body = ' '.join([helpers.decode(p.text) for p in paragraphs]) log.info(headline) log.info(body) return news_interface.Article(headline, body, url, news_orgs.REUTERS)
def get_article(self, url): '''Implementation for getting an article from Times of Israel. Args: url: A URL in the www.timesofisrael.com/* domain. Returns: The Article representing the article at that url. ''' html = helpers.get_content(url) if not html: return None soup = BeautifulSoup(html) h1 = soup.find('h1', attrs={'class': 'headline'}) headline = helpers.decode(h1.text) paragraphs = soup.findAll("p", {"itemprop": "articleBody"}) body = ' '.join([helpers.decode(p.text) for p in paragraphs]) log.info(headline) log.info(body) return news_interface.Article(headline, body, url, news_orgs.TIMES_OF_ISRAEL)
def handleDiscovery(self, dev, isNewDev, isNewData): data = dev.getScanData() for d in data: print(dev.scanData) if 255 in dev.scanData and len(dev.scanData[255]) == 29: print(len(dev.scanData[255])) raw_data = " ".join( ["{0:0>2X}".format(ord(b)) for b in dev.scanData[255]]) d = decode(raw_data) if d.search: m = self.config["i"] else: m = self.config["a"] for i, b in enumerate(d.interests): if b: if i in m: return self.start_alarm(dev.rssi)
def text_to_speech(text, lang): try: text = decode(text) client = texttospeech.TextToSpeechClient() input_text = texttospeech.types.SynthesisInput(text=text) voice = texttospeech.types.VoiceSelectionParams( language_code=lang, ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) response = client.synthesize_speech(input_text, voice, audio_config) except: return try: print('🔉') with open('output-95af1670a84.mp3', 'wb') as out: out.write(response.audio_content) os.system('afplay output-95af1670a84.mp3') os.system('rm -f output-95af1670a84.mp3') except: print('Error: Audio could not be played') os.system('rm =f output-95af1670a84.mp3')
# %% cols_interest = [ 'age', 'ratly', 'rly', 'ptmm', 'pts', 'grd', 'mor', 'ply', 'horm' ] df = df[cols_interest] # %% # Perform classic CPH # We will print the table of coefficients as well as generate a visual representation. #%% # Encode categorical values accordingly. df_dummies = df.copy(deep=True) df_dummies = helpers.decode(df_dummies) df_dummies = pd.get_dummies(df_dummies, columns=['pts', 'grd', 'mor', 'horm']) # Drop reference columns. df_dummies.drop(columns=['pts_1', 'grd_1', 'mor_lobular', 'horm_3+'], inplace=True) # Fix column ordering columns = list(df_dummies.columns.values) columns.append('horm_3-') columns.remove('horm_3-') # Visual representation. fig, ax = survival_analysis.cph(df_dummies, T, C,
def get_headline(self, soup): headline = helpers.decode(soup.h1.string) return headline
def get_body(self, soup): article = soup.article paragraphs = article.find_all('p', attrs={'class': None}) body = ' '.join([helpers.decode(p.get_text()) for p in paragraphs]) return body
def get_body(self, soup): paragraphs = soup.findAll("p", {"itemprop": "articleBody"}) body = ' '.join([helpers.decode(p.text) for p in paragraphs]) return body
def plot_shap_dependence_int(col, shap_values, X_test, fig=None, ax=None): """ Generate plot of SHAP dependence values. Parameters ---------- col: string Column name of the variable to analyze. shap_values: SHAP values X_test: pandas DataFrame Test data. Must correspond to the data used to compute the SHAP values. fig: figure handle (optional) ax: axes handle (optional) Returns ------- fig, ax Handles to figure and axes of the plot. """ # Create figure (if necessary). if fig is None and ax is None: fig, ax = plt.subplots(1, 1, figsize=[3, 3]) elif fig is None: fig = ax.get_figure() elif ax is None: ax = fig.gca() X_test_decoded = helpers.decode(X_test) plt.axhline(y=0, xmin=-10, xmax=10, linewidth=2.5, linestyle='--', color=[0.6, 0.6, 0.6]) shap.dependence_plot(col, shap_values, X_test, display_features=X_test_decoded, alpha=0.5, dot_size=5, x_jitter=1, ax=ax, show=False) ax.set_xlabel(" ") ax.set_ylabel(" ") # Specific formatting per feature. if col == 'age': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(20)) elif col == 'pts': pass elif col == 'ptmm': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(50)) elif col == 'grd': ax.set_ylabel("SHAP value") elif col == 'ply': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10)) elif col == 'horm': pass elif col == 'ratly': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(0.25)) elif col == 'rly': pass elif col == 'mor': pass plt.show() return fig, ax
def self_test(): print " In Test Mode" metadata, idx_q, idx_a = data_utils.ourmodel.data_util.load_data() (trainX, trainY), (testX, testY), (validX, validY) = helpers.split_dataset(idx_q, idx_a) model = create_model(metadata, trainX.shape[-1], trainY.shape[-1]) if FLAGS.celltype == 'GRU': if FLAGS.attention == False: ckpt_paths = 'ckpt/checkpoint/GRU/noAttention/' else: ckpt_paths = 'ckpt/checkpoint/GRU/Attention/' else: if FLAGS.attention == False: ckpt_paths = 'ckpt/checkpoint/LSTM/noAttention/' else: ckpt_paths = 'ckpt/checkpoint/LSTM/Attention/' print "Retrieving Last Model State" XX = np.load('datasets/test_review.npy', mmap_mode='r') YY = np.load('datasets/test_summary.npy', mmap_mode='r') result = [[0 for x in range(6)] for y in range(XX.shape[0])] sess = model.restore_last_session() batch_size = 16 if sess: for i in range(0, XX.shape[0], batch_size): if (i + 1) + batch_size < XX.shape[0]: output = model.predict(sess, XX[i:(i + 1) + batch_size].T) nn = XX[i:(i + 1) + batch_size] for j in range(nn.shape[0]): result[i + j][0] = helpers.decode(sequence=XX[i + j], lookup=metadata['idx2w'], separator=' ') result[i + j][1] = helpers.decode(sequence=YY[i + j], lookup=metadata['idx2w'], separator=' ') result[i + j][2] = helpers.decode(sequence=output.T[j], lookup=metadata['idx2w'], separator=' ') if len(result[i + j][2]) == 0: result[i + j][2] = ['UNK'] if len(result[i + j][1]) != 0: result[i + j][3] = score.rouge_n( result[i + j][2], result[i + j][1], 1) result[i + j][4] = score.bleu(result[i + j][2], result[i + j][1], 1) result[i + j][5] = score.f1(result[i + j][3], result[i + j][4]) else: result[i + j][3] = result[i + j][4] = result[i + j][5] = 0 df = pd.DataFrame(result) df.columns = [ "Review", "Actual Summary", "Generated Summary", "Rogue1", "Bleu1", "F1" ] df = df[:-batch_size] print("Average Rogue-1 = %.3f, Max Rouge-1 =%.3f,Min Rogue-1 = %.3f" % (df["Rogue1"].mean(), df["Rogue1"].max(), df["Rogue1"].min())) print("Average Bleu1 = %.3f, Max Bleu1=%.3f,Min Bleu1 = %.3f" % (df["Bleu1"].mean(), df["Bleu1"].max(), df["Bleu1"].min())) print("Average F1 = %.3f, Max F1=%.3f,Min F1 = %.3f" % (df["F1"].mean(), df["F1"].max(), df["F1"].min())) result_file = 'results/default.csv' if FLAGS.celltype == 'GRU': if FLAGS.attention == False: result_file = 'results/GRU_noAttention.csv' else: result_file = 'results/GRU_Attention.csv' else: if FLAGS.attention == False: result_file = 'results/LSTM_noAttention.csv' else: result_file = 'results/LSTM_Attention' df.to_csv(result_file)
def plot_shap_dependence(col, shap_values, X_test, fig=None, ax=None): """ Generate plot of SHAP dependence values. Parameters ---------- col: string Column name of the variable to analyze. shap_values: list Each element has an array of SHAP values. Each group of SHAP values will be plotted with a different color. X_test: pandas DataFrame Test data. Must correspond to the data used to compute the SHAP values. fig: figure handle (optional) ax: axes handle (optional) Returns ------- fig, ax Handles to figure and axes of the plot. """ # Create figure (if necessary). if fig is None and ax is None: fig, ax = plt.subplots(1, 1, figsize=[2.5, 2.5]) elif fig is None: fig = ax.get_figure() elif ax is None: ax = fig.gca() X_test_decoded = helpers.decode(X_test) # Get right colors. interval = np.linspace(0, 1, len(shap_values) + 1) colors = [mpl.cm.Blues(x) for x in interval][1:] # Flip list order to make sure they are plotted in the right order # (XGB in the back with darker color, CPH in the front with lighter color) shap_values.reverse() colors.reverse() plt.axhline(y=0, xmin=-10, xmax=10, linewidth=2.5, linestyle='--', color=[0.6, 0.6, 0.6]) for (ii, shap_values_curr), color in zip(enumerate(shap_values), colors): shap.dependence_plot(col, shap_values_curr + (0.1 * ii), X_test, display_features=X_test_decoded, color=color, interaction_index=None, alpha=0.5, dot_size=7.5, x_jitter=1, ax=ax, show=False) ax.set_xlabel(" ") ax.set_ylabel(" ") ax.set_ylim([-1.5, 2.5]) # Specific formatting per feature. if col == 'age': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(20)) elif col == 'pts': pass elif col == 'ptmm': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(50)) elif col == 'grd': ax.set_ylabel("SHAP value") elif col == 'ply': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10)) elif col == 'horm': pass elif col == 'ratly': ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(0.25)) elif col == 'rly': pass elif col == 'mor': cph_line = mpl.lines.Line2D([], [], color=colors[1], marker='o', label='CPH') xgb_line = mpl.lines.Line2D([], [], color=colors[0], marker='o', label='XGB') plt.legend(handles=[cph_line, xgb_line], markerscale=1, frameon=False, fontsize='xx-small') plt.show() return fig, ax