예제 #1
0
def get_guardian_comments_summary(n):
    url = processor.guardian_urls()
    all_article_urls = functions.get_all_article_urls(url, 'data-link-name', 'article')
    all_article_urls = list(set(all_article_urls))
    comment_summaries = []
    for link in all_article_urls:
        article = functions.get_article(link, 'div', 'class', 'content__article-body')
        if article:
            print(link)
            comment = functions.summarize(article, n, True, 140)
            print(comment)
            comment_summaries.append(comment)
    return comment_summaries
예제 #2
0
def get_guardian_summary(n):
    url = guardian_urls()
    all_article_urls = functions.get_all_article_urls(url, 'data-link-name',
                                                      'article')
    all_article_urls = list(set(all_article_urls))
    all_articles_text = ''
    for link in all_article_urls:
        article = functions.get_article(link, 'div', 'class',
                                        'content__article-body')
        print(link)
        all_articles_text += article
    all_articles_text = all_articles_text
    articles_summary = functions.summarize(all_articles_text, n)
    return articles_summary
예제 #3
0
tmx_file = "NP8"

tree = etree.parse(tmx_file + ".tmx")
paragraphs = tree.findall(".//seg")

lines = []

en,ne = [],[]
counter = 1

for paragraph in paragraphs:
	line = ''.join(etree.tostring(paragraph, method="text", 
		encoding="unicode").split("\n")).strip()

	lines.append(line)

	if counter % 2 == 1:
		en.append(line)
	else:
		ne.append(line)

	counter += 1

summarize(lines)
summarize(en)
summarize(ne)

write_lines(en, tmx_file + ".en")
write_lines(ne, tmx_file + ".ne")

예제 #4
0
from functions import _read, write_lines, xml_to_text, summarize, length_filter
import os

directory = "mono/"

lines = []

for file in os.listdir(directory):
    lines += xml_to_text(directory + os.path.splitext(file)[0])

lines = length_filter(lines, 25)

print("Total: ")
summarize(lines)

write_lines(lines, "e-h.ne")
예제 #5
0
import re
import functions

with open('cab/CAB_23_1_0_0059.txt', 'r') as text:
    data = text.read().replace('\n', '. ')
    data = data.replace('..', '')
    data = data.replace('No.', 'No')
    data = data.replace(' '', '')
    data = data.replace(''', '\'')
    data = data.replace('Hon.', 'Hon')
    data = data.replace('.]', '.] ')
    data = data.replace('.(', '. (')
    data = data.replace('\'\'', '"')
    data = re.sub(r'(\w\.)(\w{2,})', r'\1 \2', data)
    data = re.sub(r'(\))(\w)', r'\1 \2', data)
    data = re.sub(r'([a-z])([A-Z])', r'\1 \2', data)
    data = re.sub(r'([A-Z])\.', r'\1', data)
    data = re.sub(r'\.([a-zA-Z])', r'. \1', data)
    data = re.sub(r',([a-zA-Z])', r', \1', data)
    data = re.sub(r'([0-9],)([a-zA-Z])', r'\1, \2', data)
    data = re.sub(r'([a-z])([0-9])', r'\1 \2', data)

print(functions.summarize(data, 1, False, 300))
betas = Coef.merge(tickers).rename(columns={"Mkt-RF": "beta"})[["ticker", "beta"]].reset_index(drop=True)
sortCoef = betas.sort("beta").reset_index(drop=True)
N = sortCoef.shape[0] / 10
longTickers = sortCoef.head(4 * N).tail(3 * N)[["ticker"]].sort("ticker").reset_index(drop=True)
shortTickers = sortCoef.tail(3 * N)[["ticker"]].sort("ticker").reset_index(drop=True)

wTickers = pfOptimizer(longTickers, shortTickers, Coef, StockBeta, BETA_BOUND, WEIGHT_BOUND)
wTickers = pfOptimizer(longTickers, shortTickers, Coef, StockBeta, 0.4, 0.01)
wTickers2 = wTickers[np.abs(wTickers["weight"]) > 0.001]

# thresholds = 0.5 - np.float64(range(11)) * 0.025
thresholds = [0.3, 0.295, 0.29, 0.285, 0.28, 0.275, 0.27, 0.265, 0.26]

(pfValues, pfRecord) = functions.testPortfolio(StockPrices, StockReturns, SP500Prices, StockBeta, wTickers, START)

functions.summarize(pfValues, pfRecord, wTickers[["ticker"]], CAPITAL, BETA_BOUND)

pfValues["return"] = np.log(pfValues["value"] / pfValues["value"].shift(1))

Sharpe = pd.DataFrame()
for date in pfValues.date[20:]:
    totalReturn = np.mean(pfValues[pfValues["date"] <= date][1:]["return"])
    totalStd = np.std(pfValues[pfValues["date"] <= date][1:]["return"])
    tmpSharpe = pd.DataFrame([{"date": date, "return": totalReturn, "std": totalStd, "ratio": totalReturn / totalStd}])
    Sharpe = Sharpe.append(tmpSharpe)

plt.plot(Sharpe["date"], Sharpe["ratio"])
plt.show()

SP500ReturnsSample = SP500Returns[SP500Returns["endDate"] >= min(pfValues["date"])]
SP500Sharpe = np.mean(SP500ReturnsSample["SP500Return"]) / np.std(SP500ReturnsSample["SP500Return"])
예제 #7
0
import ReadData
import functions
# from dateutil import rrule
# from datetime import datetime

DataFolder = 'E:\Dropbox\CQA 2014\Data'
CAPITAL = 1e7
BUILD_DATE = datetime.datetime(2004, 11, 2)

(StockPrices, SP500Prices, CarhartDaily, StockBeta) = ReadData.ReadAll(DataFolder)
StockReturns = ReadData.ReadReturn(DataFolder)
minDate = StockPrices['date'].min()
maxDate = StockPrices['date'].max()
tickers = ReadData.ReadTickers(DataFolder)
tickers = tickers[tickers['ticker'] != 'TDS'].reset_index(drop=True)

wTickers = tickers
wTickers['weight'] = 1/np.float64(wTickers.shape[0])

(pfValues, pfRecord) = functions.testPortfolio(StockPrices, StockReturns, SP500Prices, StockBeta, wTickers, BUILD_DATE)

functions.summarize(pfValues, pfRecord, tickers, CAPITAL, BETA_BOUND=0.5)

# findBug = pfRecord[pfRecord['date'] < datetime.datetime(2006,1,1)]
# findBug = findBug[findBug['value']>500000]
#
# tds = pfRecord[(pfRecord['ticker'] == 'TDS') &
#                (pfRecord['date'] > datetime.datetime(2005,5,12)) &
#                (pfRecord['date'] < datetime.datetime(2005,5,20))]
#
# test = StockReturns[StockReturns['return'] > 1]