def handle(self, *args, **options): wb = Workbook() ws = wb.active ws.title = 'first sheet' for time in range(1, 346, 5): # reset data Document.objects.update(sentiment=None, predicted_sentiment=None) relevant_docs = Document.objects.filter(published__gte=ShareValue.objects.first().time, published__lte=ShareValue.objects.last().time - timedelta( minutes=time), similar__isnull=True) for document in relevant_docs: sharevalue_before = ShareValue.objects.filter(share=document.share, time__lte=document.published, time__gte=document.published - timedelta( minutes=5)).last() # first share value in the interval [d.published + time, d.published + time * 2] sharevalue_after = ShareValue.objects.filter(share=document.share, time__gte=document.published + timedelta(minutes=time), time__lte=document.published + timedelta( minutes=time * 2)).first() if sharevalue_after and sharevalue_before: price_before = sharevalue_before.price price_after = sharevalue_after.price if price_after > price_before: impact = 'pos' elif price_before > price_after: impact = 'neg' else: impact = 'neu' document.sentiment = impact document.save() known_feats = [] for doc in Document.objects.filter(sentiment__isnull=False, similar__isnull=True): text = get_nltktext(doc.text) known_feats.append((word_feats(text), doc.sentiment)) if known_feats: known_data_count = len(known_feats) # 2/3 training data num_training_data = int(round(2 * known_data_count / 3)) training_feats = known_feats[:num_training_data] classifier = NaiveBayesClassifier.train(training_feats) classifier.show_most_informative_features() # 1/3 test_data num_testing_data = int(round(known_data_count / 3)) testing_feats = known_feats[::-1][:num_testing_data] accuracy = nltk.classify.util.accuracy(classifier, testing_feats) ws.append([time, accuracy]) wb.save(settings.FILE)
def handle(self, *args, **options): wb = Workbook() ws = wb.active ws.title = 'first sheet' time = 21 # reset data Document.objects.update(sentiment=None, predicted_sentiment=None) relevant_docs = Document.objects.filter(published__gte=ShareValue.objects.first().time, published__lte=ShareValue.objects.last().time - timedelta( minutes=time)) for d in relevant_docs: # last sharevalue in interval [d.published - time, d.published] sharevalue_before = ShareValue.objects.filter(share=d.share, time__lte=d.published, time__gte=d.published - timedelta(minutes=time)).last() # first share value in the interval [d.published + time, d.published + time * 2] sharevalue_after = ShareValue.objects.filter(share=d.share, time__gte=d.published + timedelta(minutes=time), time__lte=d.published + timedelta( minutes=time * 2)).first() if sharevalue_after and sharevalue_before: price_before = sharevalue_before.price price_after = sharevalue_after.price if price_after > price_before: impact = 'pos' elif price_before > price_after: impact = 'neg' else: impact = 'neu' d.sentiment = impact d.save() relevant_docs = Document.objects.filter(published__gte=ShareValue.objects.first().time, published__lte=ShareValue.objects.last().time - timedelta( minutes=time)) for document in relevant_docs: known_feats = [] for training_docs in relevant_docs.filter(published__lt=document.published - timedelta(minutes=time)): text = get_nltktext(training_docs.text) known_feats.append((word_feats(text), training_docs.sentiment)) print('ja') if known_feats: classifier = NaiveBayesClassifier.train(known_feats) classifier.show_most_informative_features() # classify current document text = get_nltktext(document.text) result = classifier.classify(word_feats(text)) document.predicted_sentiment = result document.save() calculate_credibility() prediction = 0 for doc in Document.objects.filter(share=document.share, published__gte=document.published - timedelta(minutes=time), predicted_sentiment__isnull=False): if doc.predicted_sentiment == 'pos': prediction += 1 * doc.credibility elif doc.predicted_sentiment == 'neg': prediction += -1 * doc.credibility if prediction > 0: result = 'pos' elif prediction < 0: result = 'neg' else: result = 'neu' ws.append([result, document.sentiment, result == document.sentiment]) wb.save(settings.FILE)