コード例 #1
0
ファイル: iterators.py プロジェクト: katyalait/safas
    def __iter__(self):
        index = 0
        length = len(self.dates)
        for date in self.dates:
            index +=1
            progress(index, length, status="{}".format(date))
            neg_day_count = 0
            pos_day_count = 0
            day_articles = self.df.loc[self.df['date_written']==date]['tokens']
            if day_articles.empty:
                continue
            day_articles = " ".join(day_articles)
            tokens = word_tokenize(day_articles)
            tokens.sort()
            token_dict = {}
            for word in tokens:
                if word in token_dict:
                    token_dict[word] += 1
                else:
                    token_dict[word] = 1
            for word in token_dict:
                try:
                    count = token_dict[word]
                    synset = list(swn.senti_synsets(word))[0]
                    n_score = synset.neg_score()
                    p_score = synset.pos_score()
                    neg_day_count += n_score*count
                    pos_day_count += p_score*count

                except:
                    continue

            yield {'date': date, 'length': len(tokens), 'n_sentiment': neg_day_count/length, 'p_sentiment': pos_day_count/length}
コード例 #2
0
def tokenize(start, end):
    sw = stopwords.words('english')
    lem = WordNetLemmatizer()
    f = []
    index = 0
    d0 = datetime.strptime(start, "%Y-%m-%d")
    d1 = datetime.strptime(end, "%Y-%m-%d")
    delta = d1 - d0
    length = delta.days
    print(length)
    for date in daterange(start, end):
        arts = Article.objects.filter(date_written=date)
        index += 1
        for art in arts:
            progress(index, length, status="{}".format(art.date_written))
            c = art.contents
            ts = word_tokenize(c)
            for t in ts:
                t = t.lower()
                if not t.isalpha():
                    continue
                if t in sw:
                    continue
                l = lem.lemmatize(t)
                if l in sw:
                    continue
                f.append(l)
            ts = " ".join(f)
            art.tokens = ts
            art.save()
コード例 #3
0
    def create_objects(self):
        df_len = len(self.df.index)

        # for col in self.numeric_cols:
        #     if self.df[col].dtype == 'object':
        #         self.df[col] = self.df[col].apply(lambda x: float(x.replace(",", "")))
        #         if col==self.volume:
        #             self.df[col] = self.df[col].apply(self.convert_vol)

        for index, row in self.df.iterrows():
            stockprice = None
            if not self.exist_stocks.empty:
                stockprice = self.exist_stocks.loc[self.exist_stocks['date']==row[self.date]]
            if not stockprice:
                progress(index, df_len, "Creating stock price object ...")
                curday = parser.parse(row[self.date]).date()
                interday = math.log(row[self.close]/row[self.open], 10)*100
                stockprice = StockPrice(asset=self.asset, open=row[self.open],
                                        close=row[self.close], high=row[self.high],
                                        low=row[self.low], adj_close=row[self.adj_close] if self.adj_included else row[self.close],
                                        volume= 0.0 if not row[self.volume] else row[self.volume],
                                        interday_volatility=interday)
                stockprice.save()
                try:
                    stockprice.date = curday
                    stockprice.save()
                except:
                    print(curday)
                    raise Exception("Found date error")
            else:
                progress(index, df_len, "Stock price object exists ...")
コード例 #4
0
def delete_view(request, pk):
    w2v = Label.objects.get(id=pk)
    cs = Column.objects.filter(label=pk)
    for c in cs:
        vs = Value.objects.filter(column=c.id)
        length = len(vs)
        index = 0
        for v in vs:
            progress(index, length, "{}".format(c.name))
            v.delete()
            index += 1
        c.delete()
    w2v.delete()
    response = redirect('models')
    return response
コード例 #5
0
 def create_objects(self, add_contents=True):
     articles_len = len(self.df.index)
     for index, row in self.df.iterrows():
         article = self.exist_arts.loc[self.exist_arts['headline']==row['headline']]
         if article.empty:
             progress(index, articles_len, "Parsing article {}/{}... ".format(index, articles_len))
             source = Source.objects.filter(name=row['source']).first()
             if not source:
                 country = row['country']
                 if 'IRELAND' in country:
                     country = 0
                 elif 'UNITED KINGDOM' in country or 'ENGLAND' in country:
                     country = 1
                 else:
                     print(row['country'])
                     country = None
                 source = Source(
                     name=row['source'],
                     type=row['publication_type'],
                     country=country
                 )
                 source.save()
             date = datetime.strftime(parser.parse(row['date']), '%Y-%m-%d')
             article = Article(
                 headline=row['headline'],
                 length=int(row['length'].split("words")[0]),
                 source=source,
             )
             article.save()
             if add_contents:
                 self.add_contents(article, row['content'])
             article.date_written = date
             article.save()
         else:
             article = Article.objects.filter(headline=row['headline']).first()
             if add_contents:
                 self.add_contents(article, row['content'])
             article.save()
             progress(index, articles_len, "Article {}/{} exists ...".format(index, articles_len))
コード例 #6
0
ファイル: iterators.py プロジェクト: katyalait/safas
    def __iter__(self):
        index = 0
        length = len(self.dates)
        for date in self.dates:
            index +=1
            day_count = 0
            day_articles = self.df.loc[self.df['date_written']==date]['tokens']
            if day_articles.empty:
                continue
            day_articles = " ".join(day_articles)
            tokens = word_tokenize(day_articles)
            tokens.sort()
            token_dict = {}
            for token in tokens:
                if token in token_dict:
                    token_dict[token] += 1
                else:
                    token_dict[token] = 1
            for word in token_dict:
                if word in self.words:
                    day_count += token_dict[word]
            progress(index, length, status="{}: {}".format(date, day_count/len(tokens)))

            yield {'date': date, 'count': day_count, 'length': len(tokens), 'sentiment': day_count/length}