Beispiel #1
0
    def makeMonthlyAppearanceChart(self, entities, fromDate, toDate):
        cursor = mysql_conn.cursor()

        chartData = []

        for (entityId, entity) in entities:
            monthlyApp = self.selectCountAssociationsForEntityBetweenDates(entityId, fromDate, toDate)

            trace0=Bar(
                x = [self.monthYearLabel(month) for (month, _) in monthlyApp],
                y = [count for (_, count) in monthlyApp],
                name = entity,
                marker = Marker(
                        color = 'rgb(204,204,204)',
                        opacity = 0.5,
                ),
            )
            chartData.append(trace0)

        chartData = Data(chartData)
        layout = Layout( 
            xaxis=XAxis(
                #set x-axis' labels direction at 45 degree angle
                tickangle=-45,
                ),
            barmode='group',
        )
        fig = Figure(data = chartData, layout = layout)
        py.image.save_as({'data': chartData}, "appearances.png")
        img = Image.open("appearances.png")
        img.show()
Beispiel #2
0
    def makeMonthlyPolarityChart(self, entities, fromDate, toDate):
        cursor = mysql_conn.cursor()

        chartData = []

        for (entityId, entity) in entities:
            monthlyPol = self.selectAllPolaritiesForEntity(entityId, fromDate, toDate)

            trace0=Bar(
                x = [self.monthYearLabel(month) for (month, _, _) in monthlyPol],
                y = [(0.0 + rows.count(1L)) / (l+1) * 100 for (_, l, rows) in monthlyPol],
                name = entity,
                marker = Marker(
                        color = 'rgb(204,204,204)',
                        opacity = 0.5,
                ),
            )
            chartData.append(trace0)

        chartData = Data(chartData)
        layout = Layout( 
            xaxis=XAxis(
                #set x-axis' labels direction at 45 degree angle
                tickangle=-45,
                ),
            barmode='group',
        )
        fig = Figure(data = chartData, layout = layout)
        py.image.save_as({'data': chartData}, "polarities.png")
        img = Image.open("polarities.png")
        img.show()
Beispiel #3
0
    def doAssociationsForEntity(self, entity):
        cursor = mysql_conn.cursor()

        # select entity_id for entity given as parameter
        entityId = self.selectEntityId(entity)

        # select list of article_id for which associations exist
        # in database for entity given as param
        selectStmt = """SELECT article_id
                        FROM assocEntityArticle
                        WHERE entity_id=%s"""
        data = (entityId,)
        cursor.execute(selectStmt, data)
        articleIdsInDB = cursor.fetchall()
        articleIdsInDB = [pair[0] for pair in articleIdsInDB]

        # select all articles that contain entity in content
        selectStmt = """SELECT article_id
                        FROM articles
                        WHERE content LIKE %s"""
        data = ("%" + entity + "%",)
        cursor.execute(selectStmt, data)
        rows = cursor.fetchall()
        rows = [pair[0] for pair in rows]

        # find articles for which associations don't exist in the database
        diff = list(set(rows) - set(articleIdsInDB))
        if len(diff) != 0:
            insertStmt = """INSERT INTO assocEntityArticle (article_id, entity_id)
                            VALUES (%s, %s)"""
            data = [(articleId, entityId) for articleId in diff]
            cursor.executemany(insertStmt, data)
            cursor.execute("""COMMIT""")

            self.articleInfoUpdate.emit()
Beispiel #4
0
    def classifyEntitiesInArticle(self):
        cursor = mysql_conn.cursor()

        for i in xrange(len(self.entities)):
            if len(self.calculatedClassLabels[i].text().strip()) == 0:
                entityId, entity = self.entities[i]
                manualPol = self.selectManualPolaritiesForEntity(entityId)
                trainingData = [
                    self.selectArticle(id_)[4] for (id_, _) in manualPol
                ]
                trainingTarget = [polarity for (_, polarity) in manualPol]

                countVect = CountVectorizer()
                trainingDataCounts = countVect.fit_transform(trainingData)
                tfidfTransformer = TfidfTransformer()
                trainingTfidf = tfidfTransformer.fit_transform(
                    trainingDataCounts)

                clf = MultinomialNB().fit(trainingTfidf, trainingTarget)

                testData = [self.articleContent]
                testDataCounts = countVect.transform(testData)
                testTfidf = tfidfTransformer.transform(testDataCounts)

                predicted = clf.predict(testTfidf)
Beispiel #5
0
    def doAllAssociations(self):
        cursor = mysql_conn.cursor()

        entities = self.selectAllEntities()
        for entity in entities:
            self.doAssociationsForEntity(entity)

        self.articleInfoUpdate.emit()
Beispiel #6
0
    def selectAllEntities(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM entities"""
        cursor.execute(selectStmt)
        rows = cursor.fetchall()

        return rows
Beispiel #7
0
    def selectCountArticles(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT count(*)
                        FROM articles"""
        cursor.execute(selectStmt)
        rows = cursor.fetchone()

        return rows[0]
Beispiel #8
0
    def selectCountAssociations(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT count(*)
                        FROM assocEntityArticle"""
        cursor.execute(selectStmt)
        row = cursor.fetchone()

        return row[0]
Beispiel #9
0
    def deleteAllAuthors(self):
        cursor = mysql_conn.cursor()

        deleteStmt = """DELETE FROM authors
                        WHERE author_id > 0"""
        cursor.execute(deleteStmt)
        alterTableStmt = """ALTER TABLE authors AUTO_INCREMENT = 1"""
        cursor.execute(alterTableStmt)
        cursor.execute("""COMMIT""")
Beispiel #10
0
    def selectCountArticles(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT count(*)
                        FROM articles"""
        cursor.execute(selectStmt)
        rows = cursor.fetchone()

        return rows[0]
Beispiel #11
0
    def selectCountEntities(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT count(*)
                        FROM entities"""
        cursor.execute(selectStmt)
        row = cursor.fetchone()

        return row[0]
Beispiel #12
0
    def deleteAllAssociations(self):
        cursor = mysql_conn.cursor()

        deleteStmt = """DELETE FROM assocEntityArticle
                        WHERE article_id > 0"""
        cursor.execute(deleteStmt)
        cursor.execute("""COMMIT""")

        self.articleInfoUpdate.emit()
Beispiel #13
0
    def selectMinAndMaxDate(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT MIN(date), MAX(date)
                        FROM articles"""
        cursor.execute(selectStmt)
        row = cursor.fetchone()

        return row
Beispiel #14
0
    def selectPolarityForEntityInArticle(self, articleId, entityId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT polarity_manual, polarity_calculated
                        FROM assocEntityArticle
                        WHERE article_id=%s AND entity_id=%s"""
        data = (articleId, entityId)

        cursor.execute(selectStmt, data)
        return cursor.fetchone()
Beispiel #15
0
    def selectPolarityForEntityInArticle(self, articleId, entityId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT polarity_manual, polarity_calculated
                        FROM assocEntityArticle
                        WHERE article_id=%s AND entity_id=%s"""
        data = (articleId, entityId)

        cursor.execute(selectStmt, data)
        return cursor.fetchone()
Beispiel #16
0
    def selectCountClassifiedAssociations(self):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT count(*)
                        FROM assocEntityArticle
                        WHERE polarity_calculated IS NOT NULL
                        OR polarity_manual IS NOT NULL"""
        cursor.execute(selectStmt)
        row = cursor.fetchone()

        return row[0]
Beispiel #17
0
    def selectAllArticlesByEntity(self, entity):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM articles
                        WHERE content LIKE %s"""
        data = ("%" + entity + "%",)
        cursor.execute(selectStmt, data)
        rows = cursor.fetchall()

        return rows
Beispiel #18
0
    def selectEntityId(self, entity):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT entity_id
                FROM entities
                WHERE entity=%s"""
        data = (entity,)
        cursor.execute(selectStmt, data)
        entityId = cursor.fetchone()[0]

        return entityId
Beispiel #19
0
    def process_item(self, item, spider):
        cursor = mysql_conn.cursor()
        idAuth = selectAuthor(cursor, item['auth'])

        if idAuth == -1:
            item['idAuth'] = int(insertAuthor(cursor, item['auth']))
        else:
            item['idAuth'] = int(idAuth)

        insertArticle(cursor, item)
        return item
Beispiel #20
0
    def selectAuthor(self, id):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM authors
                        WHERE author_id=%s"""
        data = (id,)
        cursor.execute(selectStmt, data)
        rows = cursor.fetchone()

        return rows[1]
Beispiel #21
0
    def selectAuthor(self, id):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM authors
                        WHERE author_id=%s"""
        data = (id, )
        cursor.execute(selectStmt, data)
        rows = cursor.fetchone()

        return rows[1]
Beispiel #22
0
    def selectArticle(self, articleId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM articles
                        WHERE article_id=%s"""
        data = (articleId,)
        cursor.execute(selectStmt, data)
        row = cursor.fetchone()

        return row
Beispiel #23
0
    def clearAllCalculatedPolarities(self):
        cursor = mysql_conn.cursor()

        updateStmt = """UPDATE assocEntityArticle
                        SET polarity_calculated=%s
                        WHERE polarity_calculated IS NOT NULL"""
        data = (None,)
        cursor.execute(updateStmt, data)
        cursor.execute("""COMMIT""")

        self.articleInfoUpdate.emit()
Beispiel #24
0
    def process_item(self, item, spider):
        cursor = mysql_conn.cursor()
        idAuth = selectAuthor(cursor, item['auth'])

        if idAuth == -1:
            item['idAuth'] = int(insertAuthor(cursor, item['auth']))
        else:
            item['idAuth'] = int(idAuth)

        insertArticle(cursor, item)
        return item
Beispiel #25
0
    def selectManualPolaritiesForEntity(self, entityId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT article_id, polarity_manual
                        FROM assocEntityArticle
                        WHERE polarity_manual IS NOT NULL
                        AND entity_id=%s"""
        data = (entityId,)
        cursor.execute(selectStmt, data)
        rows = cursor.fetchall()

        return rows
Beispiel #26
0
    def selectManualPolaritiesForEntity(self, entityId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT article_id, polarity_manual
                        FROM assocEntityArticle
                        WHERE polarity_manual IS NOT NULL
                        AND entity_id=%s"""
        data = (entityId,)
        cursor.execute(selectStmt, data)
        rows = cursor.fetchall()

        return rows
Beispiel #27
0
    def deleteAllEntities(self):
        cursor = mysql_conn.cursor()

        deleteStmt = """DELETE FROM entities
                        WHERE entity_id > 0"""
        cursor.execute(deleteStmt)
        alterTableStmt = """ALTER TABLE entities AUTO_INCREMENT = 1"""
        cursor.execute(alterTableStmt)
        cursor.execute("""COMMIT""")

        self.articleInfoUpdate.emit()
        self.entityUpdate.emit()
Beispiel #28
0
    def clearClassification(self):
        cursor = mysql_conn.cursor()

        for i in xrange(len(self.entities)):
            updateStmt = """UPDATE assocEntityArticle
                            SET polarity_calculated=%s
                            WHERE article_id=%s AND entity_id=%s"""
            data = (None, self.articleId, self.entities[i][0])
            cursor.execute(updateStmt, data)
            cursor.execute("""COMMIT""")

        self.parentW.articleInfoUpdate.emit()
        self.updateEntityBox()
Beispiel #29
0
    def clearClassification(self):
        cursor = mysql_conn.cursor()

        for i in xrange(len(self.entities)):
            updateStmt = """UPDATE assocEntityArticle
                            SET polarity_calculated=%s
                            WHERE article_id=%s AND entity_id=%s"""
            data = (None, self.articleId, self.entities[i][0])
            cursor.execute(updateStmt, data)
            cursor.execute("""COMMIT""")

        self.parentW.articleInfoUpdate.emit()
        self.updateEntityBox()
Beispiel #30
0
    def deleteAllArticles(self):
        try:
            cursor = mysql_conn.cursor()

            deleteStmt = """DELETE FROM articles
                            WHERE article_id > 0"""
            cursor.execute(deleteStmt)
            alterTableStmt = """ALTER TABLE articles AUTO_INCREMENT = 1"""
            cursor.execute(alterTableStmt)
            cursor.execute("""COMMIT""")

            self.articleInfoUpdate.emit()
        except IntegrityError:
            pass
Beispiel #31
0
    def selectArticle(self, articleId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM articles
                        WHERE article_id=%s"""
        data = (articleId, )
        cursor.execute(selectStmt, data)
        row = cursor.fetchone()

        self.articleTitle = row[1]
        self.articleDate = row[2]
        self.articleAuthor = self.selectAuthor(row[3])
        self.articleContent = row[4]

        return row
Beispiel #32
0
    def selectArticle(self, articleId):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT *
                        FROM articles
                        WHERE article_id=%s"""
        data = (articleId,)
        cursor.execute(selectStmt, data)
        row = cursor.fetchone()

        self.articleTitle = row[1]
        self.articleDate = row[2]
        self.articleAuthor = self.selectAuthor(row[3])
        self.articleContent = row[4]

        return row
Beispiel #33
0
    def deleteAssciationsForEntity(self, entity):
        cursor = mysql_conn.cursor()

        selectStmt = """SELECT entity_id
                        FROM entities
                        WHERE entity=%s"""
        data = (entity,)
        cursor.execute(selectStmt, data)
        entityId = cursor.fetchone()[0]

        deleteStmt = """DELETE FROM assocEntityArticle
                        WHERE entity_id=%s"""
        data = (entityId,)
        cursor.execute(deleteStmt, data)
        cursor.execute("""COMMIT""")

        self.articleInfoUpdate.emit()
Beispiel #34
0
    def classifyAllAssociations(self):
        cursor = mysql_conn.cursor()

        entities = self.selectAllEntities()
        for (entityId, entity) in entities:
            manualPol = self.selectManualPolaritiesForEntity(entityId)
            trainingData = [self.selectArticle(id_)[4] for (id_, _) in manualPol]
            trainingTarget = [polarity for (_, polarity) in manualPol]

            algorithm = self.algorithmComboBox.currentText()
            textClf = Pipeline([('vect', CountVectorizer()),
                                ('tfidf', TfidfTransformer()),
                                ('clf', classifiers[algorithm]),
                                ])
            textClf.fit(trainingData, trainingTarget)

            # select all articles associated with entity that need to be classified
            selectStmt = """SELECT article_id
                            FROM assocEntityArticle
                            WHERE polarity_manual IS NULL
                            AND polarity_calculated IS NULL
                            AND entity_id=%s"""
            data = (entityId,)
            cursor.execute(selectStmt, data)
            ids = cursor.fetchall()

            if len(ids) > 0:
                ids = [a[0] for a in ids]
                testData = [self.selectArticle(id_)[4] for id_ in ids]

                predicted = textClf.predict(testData)
                print [x for x in predicted].count(1)
                updateData = zip(predicted, ids)
                updateData = [(polarity, entityId, id_) for (polarity, id_) in updateData]

                updateStmt = """UPDATE assocEntityArticle
                                SET polarity_calculated=%s
                                WHERE entity_id=%s AND article_id=%s"""
                cursor.executemany(updateStmt, updateData)
                cursor.execute("""COMMIT""")

                self.articleInfoUpdate.emit()
Beispiel #35
0
    def selectCountAssociationsForEntityBetweenDates(self, entityId, fromDate, toDate):
        cursor = mysql_conn.cursor()
        months = self.monthsBetweenDates(fromDate, toDate)

        selectStmt = """SELECT count(*)
                        FROM assocEntityArticle a, articles b
                        WHERE a.article_id = b.article_id
                        AND b.date BETWEEN %s AND %s
                        AND a.entity_id=%s"""
        associations = []
        if len(months) != 0:
            for month in months:
                fromDateString = self.getStringDate(month)
                toDateString = self.getStringDate(month.addMonths(1))
                data = (fromDateString, toDateString, entityId)
                cursor.execute(selectStmt, data)
                count = cursor.fetchone()[0]
                associations.append((month, count))

        return associations
Beispiel #36
0
    def removeEntity(self):
        selected = self.entityList.selectedItems()
        cursor = mysql_conn.cursor()

        for item in selected:
            self.deleteAssciationsForEntity(item.text())

            selectStmt = """SELECT entity_id
                            FROM entities
                            WHERE entity=%s"""
            data = (item.text(),)
            cursor.execute(selectStmt, data)
            entityId = cursor.fetchone()

            deleteStmt = """DELETE FROM entities
                            WHERE entity_id=%s"""
            data = (entityId[0],)
            cursor.execute(deleteStmt, data)
            cursor.execute("""COMMIT""")

            self.entityUpdate.emit()
Beispiel #37
0
    def selectEntitiesInArticle(self):
        cursor = mysql_conn.cursor()
        entities = []

        selectStmt = """SELECT entity_id
                        FROM assocEntityArticle
                        WHERE article_id=%s"""
        data = (self.articleId,)
        cursor.execute(selectStmt, data)
        listOfEntityIds = cursor.fetchall()
        listOfEntityIds = [pair[0] for pair in listOfEntityIds]

        selectStmt = """SELECT entity
                        FROM entities
                        WHERE entity_id=%s"""
        for entityId in listOfEntityIds:
            data = (entityId,)
            cursor.execute(selectStmt, data)
            entity = (entityId, cursor.fetchone()[0])
            entities.append(entity)

        return entities
Beispiel #38
0
    def selectEntitiesInArticle(self):
        cursor = mysql_conn.cursor()
        entities = []

        selectStmt = """SELECT entity_id
                        FROM assocEntityArticle
                        WHERE article_id=%s"""
        data = (self.articleId, )
        cursor.execute(selectStmt, data)
        listOfEntityIds = cursor.fetchall()
        listOfEntityIds = [pair[0] for pair in listOfEntityIds]

        selectStmt = """SELECT entity
                        FROM entities
                        WHERE entity_id=%s"""
        for entityId in listOfEntityIds:
            data = (entityId, )
            cursor.execute(selectStmt, data)
            entity = (entityId, cursor.fetchone()[0])
            entities.append(entity)

        return entities
Beispiel #39
0
    def classifyEntitiesInArticle(self):
        cursor = mysql_conn.cursor()

        for i in xrange(len(self.entities)):
            if len(self.calculatedClassLabels[i].text().strip()) == 0:
                entityId, entity = self.entities[i]
                manualPol = self.selectManualPolaritiesForEntity(entityId)
                trainingData = [self.selectArticle(id_)[4] for (id_, _) in manualPol]
                trainingTarget = [polarity for (_, polarity) in manualPol]

                countVect = CountVectorizer()
                trainingDataCounts = countVect.fit_transform(trainingData)
                tfidfTransformer = TfidfTransformer()
                trainingTfidf = tfidfTransformer.fit_transform(trainingDataCounts)

                clf = MultinomialNB().fit(trainingTfidf, trainingTarget)

                testData = [self.articleContent]
                testDataCounts = countVect.transform(testData)
                testTfidf = tfidfTransformer.transform(testDataCounts)

                predicted = clf.predict(testTfidf)