Ejemplo n.º 1
0
def load_time_series(symbol, start_date=None, end_date=None, downsample_days=1):
    log.info("loading %s for %s to %s" % (symbol, start_date, end_date))
    filename = "%s.csv" % symbol
    if not os.path.exists(filename):
        fetch.fetch_data(symbol)
    data = pandas.read_csv(filename, parse_dates=True, index_col=0)
    data = data.drop(["Open", "High", "Low", "Close", "Volume"], axis=1)
    data = data.rename(columns={"Adj Close" : symbol})
    data = data.sort()
    if data.index[0] > start_date:
        log.warning("no data for %s before %s" % (symbol, data.index[0]))
        return None
        
    data = data.truncate(before=start_date, after=end_date)
    log.info("%d rows after truncating" % len(data))

    # downsample if necessary
    if downsample_days > 1:
        drange = pandas.DateRange(start_date, end_date, offset = downsample_days * datetools.day)
        grouped = data.groupby(drange.asof)
        means = grouped.mean()
        log.info("%d rows after downsampling" % len(means))
        return means
    else:
        return data
Ejemplo n.º 2
0
def load_time_series(symbol,
                     start_date=None,
                     end_date=None,
                     downsample_days=1):
    log.info("loading %s for %s to %s" % (symbol, start_date, end_date))
    filename = "%s.csv" % symbol
    if not os.path.exists(filename):
        fetch.fetch_data(symbol)
    data = pandas.read_csv(filename, parse_dates=True, index_col=0)
    data = data.drop(["Open", "High", "Low", "Close", "Volume"], axis=1)
    data = data.rename(columns={"Adj Close": symbol})
    data = data.sort()
    if data.index[0] > start_date:
        log.warning("no data for %s before %s" % (symbol, data.index[0]))
        return None

    data = data.truncate(before=start_date, after=end_date)
    log.info("%d rows after truncating" % len(data))

    # downsample if necessary
    if downsample_days > 1:
        drange = pandas.DateRange(start_date,
                                  end_date,
                                  offset=downsample_days * datetools.day)
        grouped = data.groupby(drange.asof)
        means = grouped.mean()
        log.info("%d rows after downsampling" % len(means))
        return means
    else:
        return data
Ejemplo n.º 3
0
def get_new_data():
    print("++++++++++\nIn get_new_data module ...")
    for api in cfg.config['apis']:
        data = fetch.fetch_data(s_url=api['url'], l_filter=api['filter'])
        raw_posts = parse_feed(data)
        posts = munge_feed(raw_posts)
        db_insert(posts)
        time.sleep(1)
Ejemplo n.º 4
0
def get_weather(root):

    create_table()

    city = textField.get()
    api_data = fetch_data(city)

    if api_data['cod'] == '404':

        print('\n```````````````````````````````````````')
        print(city + ' ' + api_data['message'])
        print('```````````````````````````````````````')

        final_info = 'null' + '\n' + 'null'
        final_data = '\nCity name: ' + 'null' + '\nCity id: ' + 'null' + '\n' + '\nHumidity: ' + 'null' + '\nWind speed: ' + 'null' + '\nVisibility: ' + 'null'

        label1.config(text=final_info)
        label2.config(text=final_data)

    else:

        weather_desc = str(api_data['weather'][0]['description'])
        temp_city = str(float(api_data['main']['temp']) - 273.15)[:4] + '°C'
        humid = str(api_data['main']['humidity']) + '%'
        wind_spd = str(api_data['wind']['speed']) + ' kmph'
        city_name = str(api_data['name'])
        city_id = str(api_data['id'])
        date_time = '[' + str(
            datetime.now().strftime("%d-%b-%Y | %I:%M %p")) + ']'
        visibility = str(float(api_data['visibility']) / 1000)[:4] + ' km'

        insert_into_table(city_id, city_name, date_time, temp_city,
                          weather_desc, wind_spd, humid, visibility)

        print(
            '\n\n------------------------------------------------------------------------------------'
        )
        print('Weather stats for -> {} | City-id : {} | {}'.format(
            city_name, city_id, date_time))
        print(
            '------------------------------------------------------------------------------------\n'
        )

        print('Current Temperature   :  {}'.format(temp_city))
        print('Weather Discription   :  {}'.format(weather_desc))
        print('Wind Speed            :  {}'.format(wind_spd))
        print('Humidity              :  {}'.format(humid))
        print('Visibility            :  {}\n'.format(visibility))

        final_info = weather_desc + '\n' + temp_city
        final_data = '\nCity name: ' + city_name + '\nCity id: ' + city_id + '\n' + date_time + '\nHumidity: ' + humid + '\nWind speed: ' + wind_spd + '\nVisibility: ' + visibility

        label1.config(text=final_info)
        label2.config(text=final_data)
Ejemplo n.º 5
0
def get_new_data(page="default"):
    print("++++++++++\nIn get_new_data module ...")
    print("+++\nPage is: ", page)
    for api in cfg.config['apis'][page]:
        data = fetch.fetch_data(s_url=api['url'], l_filter=api['filter'])
        raw_posts = parse_feed(data)
        posts = munge_feed(raw_posts)
        # posts = filter_feed(raw_posts)
        db_insert(posts)
        time.sleep(1)
    expire()
Ejemplo n.º 6
0
def get_new_data(s_file_name):
    print("++++++++++\nIn get_new_data module ...")
    # need to select which apis(s) to check
    for api in cfg.config['apis'][s_file_name]:
        data = fetch.fetch_data(s_url=api,
                                l_filter=cfg.config['apis']['filter'])
        raw_posts = parse_feed(data)
        # posts = munge_feed(raw_posts)
        posts = filter_feed(raw_posts)
        db_insert(posts)
        time.sleep(2)
Ejemplo n.º 7
0
def get_api():
    db = {}
    data = fetch.fetch_data(cfg.config['api'])
    # extract parties info
    parties_cp = data['Election']['Leading']['Party']
    parties = []
    party_list = ["LIB", "PC", "NDP", "GRN"]
    clr_list = {"LIB": "red", "PC": "blue", "NDP": "orange", "GRN": "green"}
    for x in parties_cp:
        if x['Name'] in party_list:
            obj = {}
            obj['Name'] = x['Name']
            obj['seats'] = x['Leading'] + x['Elected']
            obj['clr'] = clr_list[obj['Name']]
            parties.append(obj)
    db['parties'] = parties
    # print(data['Election']['Riding'])
    # extract ridings of interest info
    ridings_list = [
        "Hamilton Centre", "Hamilton Eastâ\x80\x94Stoney Creek",
        "Hamilton Mountain", "Hamilton Westâ\x80\x94Ancasterâ\x80\x94Dundas",
        "Flamboroughâ\x80\x94Glanbrook", "Burlington", "Niagara West",
        "Haldimandâ\x80\x94Norfolk", "Oakville Northâ\x80\x94Burlington",
        "Brantfordâ\x80\x94Brant"
    ]
    ridings_cp = [
        x for x in data['Election']['Riding'] if x['RNE'] in ridings_list
    ]
    ridings = []
    for x in ridings_cp:
        obj = {}
        obj['name'] = x['RNE'].replace("â\x80\x94", "–")
        obj['candidates'] = []
        for y in x['Candidate']:
            obj2 = {}
            obj2['name'] = y['FN'] + ' ' + y['LN']
            obj2['party'] = y['PE']
            obj2['votes'] = y['V']
            obj['candidates'].append(obj2)
        ridings.append(obj)
    db['ridings'] = ridings
    db['leaders'] = {
        "ford": "Won riding",
        "horwath": "Won riding",
        "schreiner": "Won riding",
        "wynne": "Won riding"
    }
    with io.open("cp.db", "w+", encoding='utf8') as file:
        file.write(json.dumps(db, ensure_ascii=False))
    return db
Ejemplo n.º 8
0
    def test_fetch(self):
        """
        Tests TFL API for fetching data

        Args:
            Self
        
        Returns:
            None
        
        Raises:
            None
        """
        res = fetch_data()
        self.assertIsInstance(res, list)
def main():
    data = fetch_data()
    for d in data:
        print "Getting image for", d["EntryId"]
        url_complex = d[config.IMAGE_FIELD]
        try:
            url = url_complex.split("(")[1].split(")")[0]
            ext = url_complex.split("(")[0].split(".")[1]
            name = d["EntryId"]
            path = name + "." + ext
            r = urllib2.urlopen(url)
            with open(config.PROFILE_PICTURE_FOLDER+"/"+path, 'wb') as f:
                f.write(r.read())
        except Exception:
            print "***** No Image Found *****", d["EntryId"]
Ejemplo n.º 10
0
    def test_dict(self):
        """
        Tests formatting of response data from TFL API

        Args:
            Self
        
        Returns:
            None
        
        Raises:
            None
        """
        res = fetch_data()
        data = data_for_display(res)
        self.assertIsNotNone(data)
Ejemplo n.º 11
0
def main():
    data = fetch_data()
    counter = 1
    dumper = []
    page = 1
    for d in data:
        profile_photo_file = get_file_name_for_entry(d) #extension
        dumper.append([d[config.NAME_FIELD],d[config.EMAIL_FIELD],d[config.TWITTER_FIELD],d[config.DESCRIPTION_FIELD],profile_photo_file])
        if counter % 5 == 0:
            p = ProfilePage()
            p.run(dumper)
            dumper = []
            p.save(page)
            page += 1
        counter +=1
    p = ProfilePage()
    p.run(dumper)
    dumper = []
    p.save(page)
Ejemplo n.º 12
0
def get_api():
    db = {}
    data = fetch.fetch_data(cfg.config['api'])
    # extract parties info
    parties_cp = data['Election']['Leading']['Party']
    parties = []
    party_list = ["LIB", "PC", "NDP", "GRN"]
    clr_list = {"LIB": "red", "PC": "blue", "NDP": "orange", "GRN": "green"}
    for x in parties_cp:
        if x['Name'] in party_list:
            print(x['Name'])
            obj = {}
            obj['Name'] = x['Name']
            obj['seats'] = x['Leading'] + x['Elected']
            obj['clr'] = clr_list[obj['Name']]
            parties.append(obj)
    db['parties'] = parties
    # print(data['Election']['Riding'])
    # extract ridings of interest info
    ridings_list = [
        "Hamilton Centre", "Hamilton Eastâ\x80\x94Stoney Creek",
        "Hamilton Mountain", "Hamilton Westâ\x80\x94Ancasterâ\x80\x94Dundas",
        "Flamboroughâ\x80\x94Glanbrook"
    ]
    ridings_cp = [
        x for x in data['Election']['Riding'] if x['RNE'] in ridings_list
    ]
    ridings = []
    for x in ridings_cp:
        obj = {}
        obj['name'] = x['RNE'].replace("â\x80\x94", "–")
        obj['candidates'] = []
        for y in x['Candidate']:
            obj2 = {}
            obj2['name'] = y['FN'] + ' ' + y['LN']
            obj2['party'] = y['PE']
            obj2['votes'] = y['V']
            obj['candidates'].append(obj2)
        ridings.append(obj)
    db['ridings'] = ridings
    return db
Ejemplo n.º 13
0
def index():
    data = fetch_data()
    display_data = data_for_display(data)
    write_data(display_data)
    return render_template("index.html", data=display_data)
Ejemplo n.º 14
0
def raw_data():
    data = fetch_data()
    return jsonify(data)
Ejemplo n.º 15
0
def fetch_hypos(synset):
    data = fetch.fetch_data(HYPO_URL % synset)
    return data.replace("-", "").split("\r\n")
Ejemplo n.º 16
0
def fetch_image_urls(synset):
    data = fetch.fetch_data(MAPPING_URL % synset)
    image_mappings = [y.split() for y in data.split("\r\n") if y]
    return image_mappings
Ejemplo n.º 17
0
#!/usr/bin/env python3 -W ignore::DeprecationWarning

import sys
import warnings

from fetch import fetch_data
from train import train_data

# suppress all warnings
warnings.filterwarnings("ignore")

if __name__ == "__main__":

    fetch_obj = fetch_data(data_type=0)
    test_obj = fetch_data(data_type=1)
    train_obj = train_data(fetch_obj.label_df, fetch_obj.pseudo_df,
                           test_obj.unlabel_df)
    # apply a tf-idf model with SVD
    combine_data = [*train_obj.labeled_data, *train_obj.pseudo_data]
    train_obj.fit_vectorizer(combine_data,
                             min_df=0.010,
                             max_df=0.8,
                             ngram_range=(1, 2),
                             svd=True)
    # using lda
    train_obj.train_model()
    # test data
    train_obj.fit_vectorizer(train_obj.test_data,
                             min_df=0.010,
                             max_df=0.8,
                             ngram_range=(1, 2),
Ejemplo n.º 18
0
def update():
    """
    更新数据库,异步函数
    :return: 无返回
    """
    global update_process
    global update_process_percent

    update_process = "INITIATING"
    update_process_percent = 0.0
    con = sqlite3.connect('essay.db')
    database.init(con)  # 初始化数据库

    fetch_status, total_essay = fetch.total_essay_number()  # 得到当前cs.AI分类下的所有论文数
    if not fetch_status:  # 如果拉取失败,返回服务器错误
        raise Exception("Cannot get the count of total essay number")
    start_offset = (total_essay - 1) // request_max_results * request_max_results  # 由于是从后往前翻页,故计算开始的offset值
    last_updated = database.latest_update_time(con)  # 得到数据库中最晚更新的论文的时间戳,晚于其更新的论文都是未插入数据库的

    update_process = "GETTING ESSAYS INFO"
    essay_to_insert = []
    pdf_to_fetch = []
    break_flag = False
    for i in range(start_offset, -1, -request_max_results):
        update_process_percent = 1 - (i / total_essay)
        essays = list()  # 论文集
        trail_counter = 0  # 失败计数器,由于此处是频繁拉取所以需要多次尝试机会
        while essays is None or len(essays) == 0:
            if trail_counter >= 5:  # 超出尝试次数,服务器错误
                return
            status, essays = fetch.fetch_data(i, request_max_results)  # 尝试去拉取
            trail_counter = trail_counter + 1
        for essay in essays:
            # 要插入的论文,更新必须晚于数据库中更新最晚的论文,且不位于数据库中
            if essay["updated"] > last_updated or len(database.query(con, "id", essay["id"])) == 0:
                essay_to_insert.append(essay)
                if pdf_end_time > essay["updated"] >= pdf_start_time:  # 在2020年10月1日后发表,2021年1月1日前停止记录,先记录要下载的pdf
                    pdf_to_fetch.append((essay["pdf"], essay["id"]))
            else:
                break_flag = True  # 由于返回值论文是从晚到早的,若出现了相同的论文,必定是之前已经插入到数据库的论文
                break
        if break_flag:
            break

    update_process = "INSERT INTO DATABASE"
    database.insert(con, essay_to_insert)  # 向数据库里push数据

    if os.path.exists("pdf_list.tmp"):  # 获取之前缓存的要拉取的pdf的文件
        temp_file = open("pdf_list.tmp")
        pdf_to_fetch.extend(json.loads(temp_file.read()))
        temp_file.close()
    temp_file = open("pdf_list.tmp", "w")  # 往pdf_list.tmp文件中放置当前要拉取的pdf,作为缓存
    temp_file.write(json.dumps(pdf_to_fetch))
    temp_file.close()

    update_process = "DOWNLOADING PDF"
    count = 1
    for essay in pdf_to_fetch:  # 此处开始下载pdf
        update_process_percent = count / len(pdf_to_fetch)
        fetch.download_pdf(essay[0], essay[1])
        count = count + 1

    if os.path.exists("pdf_list.tmp"):  # 下载完毕,删除pdf_list.tmp
        os.remove("pdf_list.tmp")
    con.close()