def get_guardian_comments_summary(n): url = processor.guardian_urls() all_article_urls = functions.get_all_article_urls(url, 'data-link-name', 'article') all_article_urls = list(set(all_article_urls)) comment_summaries = [] for link in all_article_urls: article = functions.get_article(link, 'div', 'class', 'content__article-body') if article: print(link) comment = functions.summarize(article, n, True, 140) print(comment) comment_summaries.append(comment) return comment_summaries
def get_guardian_summary(n): url = guardian_urls() all_article_urls = functions.get_all_article_urls(url, 'data-link-name', 'article') all_article_urls = list(set(all_article_urls)) all_articles_text = '' for link in all_article_urls: article = functions.get_article(link, 'div', 'class', 'content__article-body') print(link) all_articles_text += article all_articles_text = all_articles_text articles_summary = functions.summarize(all_articles_text, n) return articles_summary
tmx_file = "NP8" tree = etree.parse(tmx_file + ".tmx") paragraphs = tree.findall(".//seg") lines = [] en,ne = [],[] counter = 1 for paragraph in paragraphs: line = ''.join(etree.tostring(paragraph, method="text", encoding="unicode").split("\n")).strip() lines.append(line) if counter % 2 == 1: en.append(line) else: ne.append(line) counter += 1 summarize(lines) summarize(en) summarize(ne) write_lines(en, tmx_file + ".en") write_lines(ne, tmx_file + ".ne")
from functions import _read, write_lines, xml_to_text, summarize, length_filter import os directory = "mono/" lines = [] for file in os.listdir(directory): lines += xml_to_text(directory + os.path.splitext(file)[0]) lines = length_filter(lines, 25) print("Total: ") summarize(lines) write_lines(lines, "e-h.ne")
import re import functions with open('cab/CAB_23_1_0_0059.txt', 'r') as text: data = text.read().replace('\n', '. ') data = data.replace('..', '') data = data.replace('No.', 'No') data = data.replace(' '', '') data = data.replace(''', '\'') data = data.replace('Hon.', 'Hon') data = data.replace('.]', '.] ') data = data.replace('.(', '. (') data = data.replace('\'\'', '"') data = re.sub(r'(\w\.)(\w{2,})', r'\1 \2', data) data = re.sub(r'(\))(\w)', r'\1 \2', data) data = re.sub(r'([a-z])([A-Z])', r'\1 \2', data) data = re.sub(r'([A-Z])\.', r'\1', data) data = re.sub(r'\.([a-zA-Z])', r'. \1', data) data = re.sub(r',([a-zA-Z])', r', \1', data) data = re.sub(r'([0-9],)([a-zA-Z])', r'\1, \2', data) data = re.sub(r'([a-z])([0-9])', r'\1 \2', data) print(functions.summarize(data, 1, False, 300))
betas = Coef.merge(tickers).rename(columns={"Mkt-RF": "beta"})[["ticker", "beta"]].reset_index(drop=True) sortCoef = betas.sort("beta").reset_index(drop=True) N = sortCoef.shape[0] / 10 longTickers = sortCoef.head(4 * N).tail(3 * N)[["ticker"]].sort("ticker").reset_index(drop=True) shortTickers = sortCoef.tail(3 * N)[["ticker"]].sort("ticker").reset_index(drop=True) wTickers = pfOptimizer(longTickers, shortTickers, Coef, StockBeta, BETA_BOUND, WEIGHT_BOUND) wTickers = pfOptimizer(longTickers, shortTickers, Coef, StockBeta, 0.4, 0.01) wTickers2 = wTickers[np.abs(wTickers["weight"]) > 0.001] # thresholds = 0.5 - np.float64(range(11)) * 0.025 thresholds = [0.3, 0.295, 0.29, 0.285, 0.28, 0.275, 0.27, 0.265, 0.26] (pfValues, pfRecord) = functions.testPortfolio(StockPrices, StockReturns, SP500Prices, StockBeta, wTickers, START) functions.summarize(pfValues, pfRecord, wTickers[["ticker"]], CAPITAL, BETA_BOUND) pfValues["return"] = np.log(pfValues["value"] / pfValues["value"].shift(1)) Sharpe = pd.DataFrame() for date in pfValues.date[20:]: totalReturn = np.mean(pfValues[pfValues["date"] <= date][1:]["return"]) totalStd = np.std(pfValues[pfValues["date"] <= date][1:]["return"]) tmpSharpe = pd.DataFrame([{"date": date, "return": totalReturn, "std": totalStd, "ratio": totalReturn / totalStd}]) Sharpe = Sharpe.append(tmpSharpe) plt.plot(Sharpe["date"], Sharpe["ratio"]) plt.show() SP500ReturnsSample = SP500Returns[SP500Returns["endDate"] >= min(pfValues["date"])] SP500Sharpe = np.mean(SP500ReturnsSample["SP500Return"]) / np.std(SP500ReturnsSample["SP500Return"])
import ReadData import functions # from dateutil import rrule # from datetime import datetime DataFolder = 'E:\Dropbox\CQA 2014\Data' CAPITAL = 1e7 BUILD_DATE = datetime.datetime(2004, 11, 2) (StockPrices, SP500Prices, CarhartDaily, StockBeta) = ReadData.ReadAll(DataFolder) StockReturns = ReadData.ReadReturn(DataFolder) minDate = StockPrices['date'].min() maxDate = StockPrices['date'].max() tickers = ReadData.ReadTickers(DataFolder) tickers = tickers[tickers['ticker'] != 'TDS'].reset_index(drop=True) wTickers = tickers wTickers['weight'] = 1/np.float64(wTickers.shape[0]) (pfValues, pfRecord) = functions.testPortfolio(StockPrices, StockReturns, SP500Prices, StockBeta, wTickers, BUILD_DATE) functions.summarize(pfValues, pfRecord, tickers, CAPITAL, BETA_BOUND=0.5) # findBug = pfRecord[pfRecord['date'] < datetime.datetime(2006,1,1)] # findBug = findBug[findBug['value']>500000] # # tds = pfRecord[(pfRecord['ticker'] == 'TDS') & # (pfRecord['date'] > datetime.datetime(2005,5,12)) & # (pfRecord['date'] < datetime.datetime(2005,5,20))] # # test = StockReturns[StockReturns['return'] > 1]