コード例 #1
0
ファイル: finnhub.py プロジェクト: ludde127/Trader_2.1
    def on_message(self, message):
        """Get message, add to dataframes and each 15 updates save it to drive"""

        mess = ast.literal_eval(message)
        if mess["type"] == "trade":
            print(message)
            data = mess["data"][0]
            rel_sym = data["s"]
            price = data["p"]
            time = data["t"]
            volume = data["v"]
            info_dict = {
                "time": float(time),
                "price": float(price),
                "volume": float(volume)
            }
            self.dataframe_dict[rel_sym] = self.dataframe_dict[rel_sym].append(
                info_dict, ignore_index=True)
            if len(self.dataframe_dict[rel_sym].index.values) % 357 == 0:
                print(self.dataframe_dict[rel_sym])
                # Time is correct but does not show //
                # print(f"first_time: {self.dataframe_dict[rel_sym]['time'].values[0]}, last: {self.dataframe_dict[rel_sym]['time'].values[-1]}")
                file = files["streamed_data_finnhub"]
                update_hdf5(file,
                            key=rel_sym,
                            dataframe=self.dataframe_dict[rel_sym])
                self.dataframe_dict[rel_sym] = self.dataframe_dict[
                    rel_sym].iloc[0:0]
コード例 #2
0
ファイル: processing.py プロジェクト: ludde127/Trader_2.1
def make_all_timestamp():
    dold = read_hdf5(files["news_store"],
                     "twitter_conversations").reset_index()
    #dold["created_date"] = dold["created_date"].apply(date_to_posix)
    dold = dold.dropna().sort_values(by="created_date")
    print(dold)
    print(dold.loc[dold.index.values[0]])
    update_hdf5(files["news_store"],
                "twitter_conversations",
                dataframe=dold.reset_index(),
                append=False)
コード例 #3
0
ファイル: news.py プロジェクト: ludde127/Trader_2.1
 def parse_article(self, url, save_to_self=True):
     art = Article(url)
     art.download()
     art.parse()
     dic_temp = {
         "link": str(art.url),
         "text": str(art.text),
         "title": str(art.title),
         "created": str(art.publish_date),
         "keywords": str(art.keywords),
         "author": str(art.authors)
     }
     df = self.df_art_base.append(dic_temp, ignore_index=True)
     if art.url not in self.urls["gotten"]:
         update_hdf5(files["news_store"], "news_articles", dataframe=df)
     if save_to_self:
         self.df_art = self.df_art.append(dic_temp, ignore_index=True)
     else:
         return dic_temp
コード例 #4
0
ファイル: news.py プロジェクト: ludde127/Trader_2.1
    def scrape_subreddit(self,
                         added_subreddits=(),
                         items=10,
                         return_items=False,
                         only_added=False):
        if only_added:
            subreddits_ = added_subreddits
        else:
            subreddits_ = subreddits(added=added_subreddits)
        if items is None:
            items = 50
        subreddits_gotten = []
        for subreddit in subreddits_:
            hot_posts = self.red.subreddit(subreddit).hot(limit=items)
            subreddits_gotten.append((hot_posts, subreddit))
        comments = {}
        for sub in subreddits_gotten:
            comments[sub] = self.parse_submission_obj(sub)
        try:
            with open(files["reddit_comments"], "rb") as f:
                old = pickle.load(f)
        except (FileNotFoundError, EOFError) as e:
            old = {}
            error(e)
        with open(files["reddit_comments"], "wb") as f:
            if old is not None:
                pickle.dump(comments.update(old), f)
            else:
                pickle.dump(comments, f)
        print(self.df_subs)
        update_hdf5(files["news_store"],
                    "reddit",
                    dataframe=self.df_subs,
                    mode="a",
                    append=False)

        if return_items:
            return self.df_subs, comments
コード例 #5
0
ファイル: processing.py プロジェクト: ludde127/Trader_2.1
def to_backup(file, key, append=True):
    backup_df = read_hdf5(file, key)
    print(backup_df)
    update_hdf5(files["backup"], key, append=append, dataframe=backup_df)
コード例 #6
0
ファイル: processing.py プロジェクト: ludde127/Trader_2.1
    [columns_list.extend(t) for t in return_tags()]
    df = pd.DataFrame(columns=columns_list)
    for dict in dicts:
        print(dict)
        df = df.append(dict, ignore_index=True).fillna(0)
    print(df)
    return df


if __name__ == "__main__":
    sleep(1000)
    get_news_tags()
    tag = read_hdf5(files["tags_df"], "twitter").AMD.values
    comb = read_hdf5(files["tags_df"], "twitter").tags_combined.values

    for t, c in zip(tag, comb):
        print(t / c)

    sleep(400)
    pprint(return_tags())
    dfs = split_hourly(read_hdf5(files["news_store"], "twitter_conversations"))
    tags_dicts = []
    for df in dfs:
        d = tags_from_df(df, title=False)
        tags_dicts.append(d)
    df_interval = dicts_to_df(tags_dicts)
    update_hdf5(files["tags_df"],
                "twitter",
                dataframe=df_interval,
                append=False)
コード例 #7
0
ファイル: news.py プロジェクト: ludde127/Trader_2.1
    def get_conversations(self, since_id=0, tags=(), items_batch=25):

        if not tags:
            tags = [t for t in twitter_tags() if len(t) > 1]
        log(f"Getting raw data for {tags}, since_id={since_id}")
        search_str = ""
        try:
            data = read_hdf5(files["news_store"], key="twitter_conversations")
            ids = [int(i) for i in data.id.values]
            self.ids.extend(ids)
            since_id = max(ids)
        except Exception as e:
            since_id = 1252311068950638593
            error(e)
        if not since_id:
            try:
                ids = read_hdf5(self.store_dir,
                                "twitter_conversations")["id"].values
                ids_ = True
            except (AttributeError, KeyError, TypeError) as e:
                error(str(e))
                ids_ = False
            if not ids_:
                since_id = 0
            else:
                since_id = max(ids)
        df = pd.DataFrame(columns=[
            "created", "id", "retweets", "text", "user_id", "favorits",
            "user_followers"
        ])
        tags = [tag for tag in tags if type(tag) is type(str())]

        for t in tags[:40]:
            search_str += f"{t} OR "
        if search_str[-3:] == "OR ":
            search_str = search_str[:-4]
        search_str += " -filter:retweets"
        print(search_str)

        for i in range(5):
            try:
                since_id = max(self.ids)
                if since_id:
                    cursor = tweepy.Cursor(self.api.search,
                                           q=search_str,
                                           lang="en",
                                           full_text=True,
                                           since_id=since_id,
                                           wait_on_rate_limit=True,
                                           tweet_mode="extended",
                                           wait_on_rate_limit_notify=True,
                                           retry_delay=5).items(items_batch)
                else:
                    cursor = tweepy.Cursor(self.api.search,
                                           q=search_str,
                                           lang="en",
                                           full_text=True,
                                           wait_on_rate_limit=True,
                                           tweet_mode="extended",
                                           wait_on_rate_limit_notify=True,
                                           retry_delay=5).items(items_batch)
                for tweet in cursor:
                    j = tweet._json
                    if j["id"] in self.ids:
                        print(str(j["id"]) + " already in wtf")
                    else:
                        created = date_to_posix(str(j["created_at"]),
                                                list=False)

                        if created is not None:
                            #print(j["created_at"])
                            #date = datetime.datetime.strptime(j["created_at"], "%a %b %H %M %S %z %-y").replace(tzinfo=timezone.utc).timestamp()
                            #print(date)

                            data = {
                                "created": float(created),
                                "id": str(j["id"]),
                                "retweets": str(j["retweet_count"]),
                                "text": str(j["full_text"]),
                                "user_id": str(j["user"]["id"]),
                                "favorits": str(j["favorite_count"]),
                                "user_followers":
                                str(j["user"]["followers_count"])
                            }
                            #tag_str(j["full_text"], as_str=True)

                            self.ids.append(int(j["id"]))
                            if len(data["text"]) >= 333:
                                print(data["text"], "left out")
                            else:
                                if len(data["text"]) > 25:
                                    df = df.append(data, ignore_index=True)
                        else:
                            print(j)

                df.set_index("created", inplace=True)
                print(df)

                self.ids.extend([int(v) for v in df.id.values])
                update_hdf5(files["news_store"],
                            key="twitter_conversations",
                            dataframe=df,
                            append=True)
                df = pd.DataFrame(columns=[
                    "created", "id", "retweets", "text", "user_id", "favorits",
                    "user_followers"
                ])

            except FileNotFoundError as e:
                error(str(e))
コード例 #8
0
ファイル: news.py プロジェクト: ludde127/Trader_2.1
    def gather_different(self,
                         extra_urls=None,
                         only_extra=False,
                         ignore_gotten=False,
                         save=True):
        checklang = False
        if extra_urls:
            self.urls["extras"] = set(extra_urls)
            for url_ext in extra_urls:
                mine_article(url_ext)
        if not only_extra:
            print(self.newssites)
            if len(self.newssites) > 1 and type(self.newssites) is list:
                papers = [
                    build(paper, config=self.config)
                    for paper in self.newssites
                ]
            else:
                papers = build(self.newssites, config=self.config)
            log(f"Getting Data from {len(self.newssites)} newssites...")
            news_pool.set(papers, threads_per_source=2)
            news_pool.join()
            for art_pool, url in zip(papers, self.newssites):
                print(
                    f"Handling newssite {int(self.newssites.index(url)) + 1}/{len(self.newssites)}"
                )
                for art in art_pool.articles:
                    art.parse()
                    if (str(art.url)
                            not in self.urls["gotten"]) or ignore_gotten:
                        created = date_to_posix(dates=art.publish_date,
                                                list=False)
                        if created is not None and created != "None":
                            dic_temp = {
                                "link":
                                str(art.url),
                                "text":
                                str(
                                    art.text.replace("  ",
                                                     "").replace("\n", "")),
                                "title":
                                str(art.title),
                                "created":
                                float(created),
                                "keywords":
                                str(art.keywords),
                                "author":
                                str(art.authors)
                            }
                            self.urls["gotten"] = np.append(
                                self.urls["gotten"], art.url)
                            if checklang:
                                try:
                                    if check_lang_is_en(str(art.text)):
                                        self.df_art = self.df_art.append(
                                            dic_temp, ignore_index=True)
                                    else:
                                        print(f"Blocked: {dic_temp['text']}")
                                except json.decoder.JSONDecodeError as e:
                                    error(e)
                                    if check_lang_is_en(str(art.title)):
                                        self.df_art = self.df_art.append(
                                            dic_temp, ignore_index=True)
                                    else:
                                        print(f"Blocked: {dic_temp['text']}")
                                    print("fixed?")
                            else:
                                self.df_art = self.df_art.append(
                                    dic_temp, ignore_index=True)

        if save:
            print(self.df_art)
            try:
                pass
                #print(self.df_art.to_string())
            except:
                pass
            update_hdf5(files["news_store"],
                        "news_articles",
                        dataframe=self.df_art,
                        mode="a",
                        append=False)