def __iter__(self): index = 0 length = len(self.dates) for date in self.dates: index +=1 progress(index, length, status="{}".format(date)) neg_day_count = 0 pos_day_count = 0 day_articles = self.df.loc[self.df['date_written']==date]['tokens'] if day_articles.empty: continue day_articles = " ".join(day_articles) tokens = word_tokenize(day_articles) tokens.sort() token_dict = {} for word in tokens: if word in token_dict: token_dict[word] += 1 else: token_dict[word] = 1 for word in token_dict: try: count = token_dict[word] synset = list(swn.senti_synsets(word))[0] n_score = synset.neg_score() p_score = synset.pos_score() neg_day_count += n_score*count pos_day_count += p_score*count except: continue yield {'date': date, 'length': len(tokens), 'n_sentiment': neg_day_count/length, 'p_sentiment': pos_day_count/length}
def tokenize(start, end): sw = stopwords.words('english') lem = WordNetLemmatizer() f = [] index = 0 d0 = datetime.strptime(start, "%Y-%m-%d") d1 = datetime.strptime(end, "%Y-%m-%d") delta = d1 - d0 length = delta.days print(length) for date in daterange(start, end): arts = Article.objects.filter(date_written=date) index += 1 for art in arts: progress(index, length, status="{}".format(art.date_written)) c = art.contents ts = word_tokenize(c) for t in ts: t = t.lower() if not t.isalpha(): continue if t in sw: continue l = lem.lemmatize(t) if l in sw: continue f.append(l) ts = " ".join(f) art.tokens = ts art.save()
def create_objects(self): df_len = len(self.df.index) # for col in self.numeric_cols: # if self.df[col].dtype == 'object': # self.df[col] = self.df[col].apply(lambda x: float(x.replace(",", ""))) # if col==self.volume: # self.df[col] = self.df[col].apply(self.convert_vol) for index, row in self.df.iterrows(): stockprice = None if not self.exist_stocks.empty: stockprice = self.exist_stocks.loc[self.exist_stocks['date']==row[self.date]] if not stockprice: progress(index, df_len, "Creating stock price object ...") curday = parser.parse(row[self.date]).date() interday = math.log(row[self.close]/row[self.open], 10)*100 stockprice = StockPrice(asset=self.asset, open=row[self.open], close=row[self.close], high=row[self.high], low=row[self.low], adj_close=row[self.adj_close] if self.adj_included else row[self.close], volume= 0.0 if not row[self.volume] else row[self.volume], interday_volatility=interday) stockprice.save() try: stockprice.date = curday stockprice.save() except: print(curday) raise Exception("Found date error") else: progress(index, df_len, "Stock price object exists ...")
def delete_view(request, pk): w2v = Label.objects.get(id=pk) cs = Column.objects.filter(label=pk) for c in cs: vs = Value.objects.filter(column=c.id) length = len(vs) index = 0 for v in vs: progress(index, length, "{}".format(c.name)) v.delete() index += 1 c.delete() w2v.delete() response = redirect('models') return response
def create_objects(self, add_contents=True): articles_len = len(self.df.index) for index, row in self.df.iterrows(): article = self.exist_arts.loc[self.exist_arts['headline']==row['headline']] if article.empty: progress(index, articles_len, "Parsing article {}/{}... ".format(index, articles_len)) source = Source.objects.filter(name=row['source']).first() if not source: country = row['country'] if 'IRELAND' in country: country = 0 elif 'UNITED KINGDOM' in country or 'ENGLAND' in country: country = 1 else: print(row['country']) country = None source = Source( name=row['source'], type=row['publication_type'], country=country ) source.save() date = datetime.strftime(parser.parse(row['date']), '%Y-%m-%d') article = Article( headline=row['headline'], length=int(row['length'].split("words")[0]), source=source, ) article.save() if add_contents: self.add_contents(article, row['content']) article.date_written = date article.save() else: article = Article.objects.filter(headline=row['headline']).first() if add_contents: self.add_contents(article, row['content']) article.save() progress(index, articles_len, "Article {}/{} exists ...".format(index, articles_len))
def __iter__(self): index = 0 length = len(self.dates) for date in self.dates: index +=1 day_count = 0 day_articles = self.df.loc[self.df['date_written']==date]['tokens'] if day_articles.empty: continue day_articles = " ".join(day_articles) tokens = word_tokenize(day_articles) tokens.sort() token_dict = {} for token in tokens: if token in token_dict: token_dict[token] += 1 else: token_dict[token] = 1 for word in token_dict: if word in self.words: day_count += token_dict[word] progress(index, length, status="{}: {}".format(date, day_count/len(tokens))) yield {'date': date, 'count': day_count, 'length': len(tokens), 'sentiment': day_count/length}