예제 #1
0
def write_h2h():
    root = 'h2h/'
    for date_i in range(len(dates) + 1):
        for i in range(len(EastTeams)):
            index = 0
            qij = np.array([0] * len(EastTeams))
            for j in range(len(EastTeams)):
                if i != j:
                    qij[index] = h2h(date_i, i, j, "east")
                index += 1
            np.savetxt(root + str(date_i) + '_' + EastTeams[i] + '.dat',
                       qij,
                       delimiter=' ')

        for i in range(len(WestTeams)):
            index = 0
            qij = np.array([0] * len(WestTeams))
            for j in range(len(WestTeams)):
                if i != j:
                    qij[index] = h2h(date_i, i, j, "west")
                index += 1
            np.savetxt(root + str(date_i) + '_' + WestTeams[i] + '.dat',
                       qij,
                       delimiter=' ')

        hlp.printProgress(date_i,
                          len(dates) - 1,
                          prefix='Progress:',
                          barLength=50)
예제 #2
0
def downloadStock(company, date_from, date_to):

    company = str(company)

    if company == 'sberbank':
        code = 'SBER'
        em = '3'
    elif company == 'gazprom':
        code = 'GAZP'
        em = '16842'
    elif company == 'dm':
        code = 'DSKY'
        em = '473181'

    dfs = date_from.split('/')
    df = dfs[0].lstrip('0')
    mf = str(int(dfs[1].lstrip('0')) - 1)
    yf = dfs[2]
    datef = dfs[0] + '.' + dfs[1] + '.' + dfs[2]
    dts = date_to.split('/')
    dt = dts[0].lstrip('0')
    mt = str(int(dts[1].lstrip('0')) - 1)
    yt = dts[2]
    datet = dts[0] + '.' + dts[1] + '.' + dts[2]

    cn = company

    print('Downloading stocks...')
    sys.stdout.flush()

    url = 'http://export.finam.ru/stock.txt?market=1&em={}&code={}&apply=0&df={}&mf={}&yf={}&from={}&dt={}&mt={}&yt={}&to={}&p=8&f=stock_1&e=.txt&cn={}&dtf=4&tmf=3&MSOR=1&mstime=on&mstimever=1&sep=1&sep2=1&datf=5&at=1'.format(
        em, code, df, mf, yf, datef, dt, mt, yt, datet, cn)
    stocks_dates = []
    stocks = []
    stocks_count = 0
    data = urlopen(url).read().decode("utf-8").split('\r\n')

    for i in range(1, len(data) - 1):
        printProgress(stocks_count, len(data) - 1)
        item_split = data[i].split(',')
        stocks_dates.append(item_split[0])
        stocks.append(item_split[2])
        stocks_count += 1

    printProgress(stocks_count, stocks_count, True)
    print('Done!')
    sys.stdout.flush()

    return stocks_dates, stocks, stocks_count
예제 #3
0
def stem(news_dates, news, news_count):

    stems_dates = []
    [
        stems_dates.append(date) for date in news_dates
        if date not in stems_dates
    ]
    stems = []
    stems_count = len(stems_dates)
    i = 0
    j = 0

    print('Stemming news...')
    sys.stdout.flush()

    while i < stems_count:
        stem = []
        printProgress(i, stems_count)

        while j < news_count and stems_dates[i] == news_dates[j]:
            words = text_to_word_sequence(
                news[j],
                filters=''.join(punctuation) +
                '–—01234567890abcdefghijklmnopqrstuvwxyz')
            for word in words:
                if word not in stemmer.stopwords and word != ' ':
                    stem.append(stemmer.stem(word))

            j += 1

        i += 1
        stems.append(' '.join(stem))

    printProgress(stems_count, stems_count, True)
    print('Done!')
    sys.stdout.flush()

    return stems_dates, stems, stems_count
예제 #4
0
def write_gamesleft():
    root = 'gamesleft/'
    for date_i in range(161, len(dates) + 1):
        for i in range(len(EastTeams)):
            index = 0
            gij = np.array([0] * (len(EastTeams) + len(WestTeams)))
            for j in range(len(EastTeams)):
                if i != j:
                    gij[index] = gamesleft(date_i, i, "east", j, "east")
                index += 1
            for j in range(len(WestTeams)):
                gij[index] = gamesleft(date_i, i, "east", j, "west")
                index += 1

            np.savetxt(root + str(date_i) + '_' + EastTeams[i] + '.dat',
                       gij,
                       delimiter=' ')

        for i in range(len(WestTeams)):
            index = 0
            gij = np.array([0] * (len(EastTeams) + len(WestTeams)))
            for j in range(len(WestTeams)):
                if i != j:
                    gij[index] = gamesleft(date_i, i, "west", j, "west")
                index += 1
            for j in range(len(EastTeams)):
                gij[index] = gamesleft(date_i, i, "west", j, "east")
                index += 1

            np.savetxt(root + str(date_i) + '_' + WestTeams[i] + '.dat',
                       gij,
                       delimiter=' ')

        hlp.printProgress(date_i,
                          len(dates) - 1,
                          prefix='Progress:',
                          barLength=50)
예제 #5
0
def downloadNews(company, amount):

    domain = 'http://mfd.ru'
    news_dates = []
    news = []
    news_count = 0

    if company == 'sberbank':
        company = '1'
    elif company == 'gazprom':
        company = '3'

    amount = int(amount)
    sys.stdout.flush()
    trs = getTrs(company, amount)
    total = len(trs)
    current = 0

    print('Downloading news...')

    while current < total:
        try:
            printProgress(current, total)
            td = trs[current].findAll('td')
            temp_date = td[0].getText().split(',')[0].strip()

            if temp_date == 'сегодня':
                today = datetime.date.today()
                item_date = today.strftime('%d/%m/%y')
            elif temp_date == 'вчера':
                yesterday = datetime.date.today() - datetime.timedelta(1)
                item_date = yesterday.strftime('%d/%m/%y')
            else:
                temp_date_split = temp_date.split('.')
                item_date = '{}/{}/{}'.format(str(temp_date_split[0]),
                                              str(temp_date_split[1]),
                                              str(temp_date_split[2][2:]))

            item_url = domain + td[1].find('a').get('href')
            item_page = urlopen(item_url)
            item_bs = BeautifulSoup(item_page, 'html.parser')
            item_single = item_bs.find('div', {
                'class': 'mfd-related-companies'
            }).findAll('a')
            item_data = item_bs.find('div', {
                'class': 'm-content'
            }).findAll('p')
            item_string = ''

            for j in range(1, len(item_data) - 2):
                item_string += item_data[j].getText() + ' '

                item_string = item_string.strip()

            if item_string != '' and len(item_single) == 1:
                news_dates.append(item_date)
                news.append(item_string)
                news_count += 1

            current += 1
            time.sleep(delay)

        except:
            time.sleep(delay_except)

    printProgress(total, total, True)
    print('Done!')
    sys.stdout.flush()

    return news_dates[::-1], news[::-1], news_count