コード例 #1
0
ファイル: best_time.py プロジェクト: corallus/algotrading
    def handle(self, *args, **options):
        wb = Workbook()
        ws = wb.active
        ws.title = 'first sheet'

        for time in range(1, 346, 5):
            # reset data
            Document.objects.update(sentiment=None, predicted_sentiment=None)

            relevant_docs = Document.objects.filter(published__gte=ShareValue.objects.first().time,
                                                    published__lte=ShareValue.objects.last().time - timedelta(
                                                        minutes=time), similar__isnull=True)

            for document in relevant_docs:

                sharevalue_before = ShareValue.objects.filter(share=document.share, time__lte=document.published,
                                                              time__gte=document.published - timedelta(
                                                                  minutes=5)).last()

                # first share value in the interval [d.published + time, d.published + time * 2]
                sharevalue_after = ShareValue.objects.filter(share=document.share,
                                                             time__gte=document.published + timedelta(minutes=time),
                                                             time__lte=document.published + timedelta(
                                                                 minutes=time * 2)).first()
                if sharevalue_after and sharevalue_before:
                    price_before = sharevalue_before.price
                    price_after = sharevalue_after.price
                    if price_after > price_before:
                        impact = 'pos'
                    elif price_before > price_after:
                        impact = 'neg'
                    else:
                        impact = 'neu'
                    document.sentiment = impact
                    document.save()

            known_feats = []
            for doc in Document.objects.filter(sentiment__isnull=False, similar__isnull=True):
                text = get_nltktext(doc.text)
                known_feats.append((word_feats(text), doc.sentiment))
            if known_feats:
                known_data_count = len(known_feats)

                # 2/3 training data
                num_training_data = int(round(2 * known_data_count / 3))
                training_feats = known_feats[:num_training_data]
                classifier = NaiveBayesClassifier.train(training_feats)
                classifier.show_most_informative_features()

                # 1/3 test_data
                num_testing_data = int(round(known_data_count / 3))
                testing_feats = known_feats[::-1][:num_testing_data]
                accuracy = nltk.classify.util.accuracy(classifier, testing_feats)

                ws.append([time, accuracy])
                wb.save(settings.FILE)
コード例 #2
0
ファイル: predictions.py プロジェクト: corallus/algotrading
    def handle(self, *args, **options):
        wb = Workbook()
        ws = wb.active
        ws.title = 'first sheet'

        time = 21

        # reset data
        Document.objects.update(sentiment=None, predicted_sentiment=None)
        relevant_docs = Document.objects.filter(published__gte=ShareValue.objects.first().time,
                                                published__lte=ShareValue.objects.last().time - timedelta(
                                                    minutes=time))

        for d in relevant_docs:
            # last sharevalue in interval [d.published - time, d.published]
            sharevalue_before = ShareValue.objects.filter(share=d.share, time__lte=d.published,
                                                          time__gte=d.published - timedelta(minutes=time)).last()
            # first share value in the interval [d.published + time, d.published + time * 2]
            sharevalue_after = ShareValue.objects.filter(share=d.share,
                                                         time__gte=d.published + timedelta(minutes=time),
                                                         time__lte=d.published + timedelta(
                                                             minutes=time * 2)).first()
            if sharevalue_after and sharevalue_before:
                price_before = sharevalue_before.price
                price_after = sharevalue_after.price
                if price_after > price_before:
                    impact = 'pos'
                elif price_before > price_after:
                    impact = 'neg'
                else:
                    impact = 'neu'
                d.sentiment = impact
                d.save()

        relevant_docs = Document.objects.filter(published__gte=ShareValue.objects.first().time,
                                                published__lte=ShareValue.objects.last().time - timedelta(
                                                    minutes=time))

        for document in relevant_docs:

            known_feats = []
            for training_docs in relevant_docs.filter(published__lt=document.published - timedelta(minutes=time)):
                text = get_nltktext(training_docs.text)
                known_feats.append((word_feats(text), training_docs.sentiment))
                print('ja')

            if known_feats:
                classifier = NaiveBayesClassifier.train(known_feats)
                classifier.show_most_informative_features()

                # classify current document
                text = get_nltktext(document.text)
                result = classifier.classify(word_feats(text))

                document.predicted_sentiment = result
                document.save()

                calculate_credibility()

                prediction = 0
                for doc in Document.objects.filter(share=document.share,
                                                   published__gte=document.published - timedelta(minutes=time),
                                                   predicted_sentiment__isnull=False):
                    if doc.predicted_sentiment == 'pos':
                        prediction += 1 * doc.credibility
                    elif doc.predicted_sentiment == 'neg':
                        prediction += -1 * doc.credibility

                if prediction > 0:
                    result = 'pos'
                elif prediction < 0:
                    result = 'neg'
                else:
                    result = 'neu'

                ws.append([result, document.sentiment, result == document.sentiment])
                wb.save(settings.FILE)