def makeMonthlyAppearanceChart(self, entities, fromDate, toDate): cursor = mysql_conn.cursor() chartData = [] for (entityId, entity) in entities: monthlyApp = self.selectCountAssociationsForEntityBetweenDates(entityId, fromDate, toDate) trace0=Bar( x = [self.monthYearLabel(month) for (month, _) in monthlyApp], y = [count for (_, count) in monthlyApp], name = entity, marker = Marker( color = 'rgb(204,204,204)', opacity = 0.5, ), ) chartData.append(trace0) chartData = Data(chartData) layout = Layout( xaxis=XAxis( #set x-axis' labels direction at 45 degree angle tickangle=-45, ), barmode='group', ) fig = Figure(data = chartData, layout = layout) py.image.save_as({'data': chartData}, "appearances.png") img = Image.open("appearances.png") img.show()
def makeMonthlyPolarityChart(self, entities, fromDate, toDate): cursor = mysql_conn.cursor() chartData = [] for (entityId, entity) in entities: monthlyPol = self.selectAllPolaritiesForEntity(entityId, fromDate, toDate) trace0=Bar( x = [self.monthYearLabel(month) for (month, _, _) in monthlyPol], y = [(0.0 + rows.count(1L)) / (l+1) * 100 for (_, l, rows) in monthlyPol], name = entity, marker = Marker( color = 'rgb(204,204,204)', opacity = 0.5, ), ) chartData.append(trace0) chartData = Data(chartData) layout = Layout( xaxis=XAxis( #set x-axis' labels direction at 45 degree angle tickangle=-45, ), barmode='group', ) fig = Figure(data = chartData, layout = layout) py.image.save_as({'data': chartData}, "polarities.png") img = Image.open("polarities.png") img.show()
def doAssociationsForEntity(self, entity): cursor = mysql_conn.cursor() # select entity_id for entity given as parameter entityId = self.selectEntityId(entity) # select list of article_id for which associations exist # in database for entity given as param selectStmt = """SELECT article_id FROM assocEntityArticle WHERE entity_id=%s""" data = (entityId,) cursor.execute(selectStmt, data) articleIdsInDB = cursor.fetchall() articleIdsInDB = [pair[0] for pair in articleIdsInDB] # select all articles that contain entity in content selectStmt = """SELECT article_id FROM articles WHERE content LIKE %s""" data = ("%" + entity + "%",) cursor.execute(selectStmt, data) rows = cursor.fetchall() rows = [pair[0] for pair in rows] # find articles for which associations don't exist in the database diff = list(set(rows) - set(articleIdsInDB)) if len(diff) != 0: insertStmt = """INSERT INTO assocEntityArticle (article_id, entity_id) VALUES (%s, %s)""" data = [(articleId, entityId) for articleId in diff] cursor.executemany(insertStmt, data) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit()
def classifyEntitiesInArticle(self): cursor = mysql_conn.cursor() for i in xrange(len(self.entities)): if len(self.calculatedClassLabels[i].text().strip()) == 0: entityId, entity = self.entities[i] manualPol = self.selectManualPolaritiesForEntity(entityId) trainingData = [ self.selectArticle(id_)[4] for (id_, _) in manualPol ] trainingTarget = [polarity for (_, polarity) in manualPol] countVect = CountVectorizer() trainingDataCounts = countVect.fit_transform(trainingData) tfidfTransformer = TfidfTransformer() trainingTfidf = tfidfTransformer.fit_transform( trainingDataCounts) clf = MultinomialNB().fit(trainingTfidf, trainingTarget) testData = [self.articleContent] testDataCounts = countVect.transform(testData) testTfidf = tfidfTransformer.transform(testDataCounts) predicted = clf.predict(testTfidf)
def doAllAssociations(self): cursor = mysql_conn.cursor() entities = self.selectAllEntities() for entity in entities: self.doAssociationsForEntity(entity) self.articleInfoUpdate.emit()
def selectAllEntities(self): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM entities""" cursor.execute(selectStmt) rows = cursor.fetchall() return rows
def selectCountArticles(self): cursor = mysql_conn.cursor() selectStmt = """SELECT count(*) FROM articles""" cursor.execute(selectStmt) rows = cursor.fetchone() return rows[0]
def selectCountAssociations(self): cursor = mysql_conn.cursor() selectStmt = """SELECT count(*) FROM assocEntityArticle""" cursor.execute(selectStmt) row = cursor.fetchone() return row[0]
def deleteAllAuthors(self): cursor = mysql_conn.cursor() deleteStmt = """DELETE FROM authors WHERE author_id > 0""" cursor.execute(deleteStmt) alterTableStmt = """ALTER TABLE authors AUTO_INCREMENT = 1""" cursor.execute(alterTableStmt) cursor.execute("""COMMIT""")
def selectCountArticles(self): cursor = mysql_conn.cursor() selectStmt = """SELECT count(*) FROM articles""" cursor.execute(selectStmt) rows = cursor.fetchone() return rows[0]
def selectCountEntities(self): cursor = mysql_conn.cursor() selectStmt = """SELECT count(*) FROM entities""" cursor.execute(selectStmt) row = cursor.fetchone() return row[0]
def deleteAllAssociations(self): cursor = mysql_conn.cursor() deleteStmt = """DELETE FROM assocEntityArticle WHERE article_id > 0""" cursor.execute(deleteStmt) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit()
def selectMinAndMaxDate(self): cursor = mysql_conn.cursor() selectStmt = """SELECT MIN(date), MAX(date) FROM articles""" cursor.execute(selectStmt) row = cursor.fetchone() return row
def selectPolarityForEntityInArticle(self, articleId, entityId): cursor = mysql_conn.cursor() selectStmt = """SELECT polarity_manual, polarity_calculated FROM assocEntityArticle WHERE article_id=%s AND entity_id=%s""" data = (articleId, entityId) cursor.execute(selectStmt, data) return cursor.fetchone()
def selectPolarityForEntityInArticle(self, articleId, entityId): cursor = mysql_conn.cursor() selectStmt = """SELECT polarity_manual, polarity_calculated FROM assocEntityArticle WHERE article_id=%s AND entity_id=%s""" data = (articleId, entityId) cursor.execute(selectStmt, data) return cursor.fetchone()
def selectCountClassifiedAssociations(self): cursor = mysql_conn.cursor() selectStmt = """SELECT count(*) FROM assocEntityArticle WHERE polarity_calculated IS NOT NULL OR polarity_manual IS NOT NULL""" cursor.execute(selectStmt) row = cursor.fetchone() return row[0]
def selectAllArticlesByEntity(self, entity): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM articles WHERE content LIKE %s""" data = ("%" + entity + "%",) cursor.execute(selectStmt, data) rows = cursor.fetchall() return rows
def selectEntityId(self, entity): cursor = mysql_conn.cursor() selectStmt = """SELECT entity_id FROM entities WHERE entity=%s""" data = (entity,) cursor.execute(selectStmt, data) entityId = cursor.fetchone()[0] return entityId
def process_item(self, item, spider): cursor = mysql_conn.cursor() idAuth = selectAuthor(cursor, item['auth']) if idAuth == -1: item['idAuth'] = int(insertAuthor(cursor, item['auth'])) else: item['idAuth'] = int(idAuth) insertArticle(cursor, item) return item
def selectAuthor(self, id): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM authors WHERE author_id=%s""" data = (id,) cursor.execute(selectStmt, data) rows = cursor.fetchone() return rows[1]
def selectAuthor(self, id): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM authors WHERE author_id=%s""" data = (id, ) cursor.execute(selectStmt, data) rows = cursor.fetchone() return rows[1]
def selectArticle(self, articleId): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM articles WHERE article_id=%s""" data = (articleId,) cursor.execute(selectStmt, data) row = cursor.fetchone() return row
def clearAllCalculatedPolarities(self): cursor = mysql_conn.cursor() updateStmt = """UPDATE assocEntityArticle SET polarity_calculated=%s WHERE polarity_calculated IS NOT NULL""" data = (None,) cursor.execute(updateStmt, data) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit()
def process_item(self, item, spider): cursor = mysql_conn.cursor() idAuth = selectAuthor(cursor, item['auth']) if idAuth == -1: item['idAuth'] = int(insertAuthor(cursor, item['auth'])) else: item['idAuth'] = int(idAuth) insertArticle(cursor, item) return item
def selectManualPolaritiesForEntity(self, entityId): cursor = mysql_conn.cursor() selectStmt = """SELECT article_id, polarity_manual FROM assocEntityArticle WHERE polarity_manual IS NOT NULL AND entity_id=%s""" data = (entityId,) cursor.execute(selectStmt, data) rows = cursor.fetchall() return rows
def selectManualPolaritiesForEntity(self, entityId): cursor = mysql_conn.cursor() selectStmt = """SELECT article_id, polarity_manual FROM assocEntityArticle WHERE polarity_manual IS NOT NULL AND entity_id=%s""" data = (entityId,) cursor.execute(selectStmt, data) rows = cursor.fetchall() return rows
def deleteAllEntities(self): cursor = mysql_conn.cursor() deleteStmt = """DELETE FROM entities WHERE entity_id > 0""" cursor.execute(deleteStmt) alterTableStmt = """ALTER TABLE entities AUTO_INCREMENT = 1""" cursor.execute(alterTableStmt) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit() self.entityUpdate.emit()
def clearClassification(self): cursor = mysql_conn.cursor() for i in xrange(len(self.entities)): updateStmt = """UPDATE assocEntityArticle SET polarity_calculated=%s WHERE article_id=%s AND entity_id=%s""" data = (None, self.articleId, self.entities[i][0]) cursor.execute(updateStmt, data) cursor.execute("""COMMIT""") self.parentW.articleInfoUpdate.emit() self.updateEntityBox()
def clearClassification(self): cursor = mysql_conn.cursor() for i in xrange(len(self.entities)): updateStmt = """UPDATE assocEntityArticle SET polarity_calculated=%s WHERE article_id=%s AND entity_id=%s""" data = (None, self.articleId, self.entities[i][0]) cursor.execute(updateStmt, data) cursor.execute("""COMMIT""") self.parentW.articleInfoUpdate.emit() self.updateEntityBox()
def deleteAllArticles(self): try: cursor = mysql_conn.cursor() deleteStmt = """DELETE FROM articles WHERE article_id > 0""" cursor.execute(deleteStmt) alterTableStmt = """ALTER TABLE articles AUTO_INCREMENT = 1""" cursor.execute(alterTableStmt) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit() except IntegrityError: pass
def selectArticle(self, articleId): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM articles WHERE article_id=%s""" data = (articleId, ) cursor.execute(selectStmt, data) row = cursor.fetchone() self.articleTitle = row[1] self.articleDate = row[2] self.articleAuthor = self.selectAuthor(row[3]) self.articleContent = row[4] return row
def selectArticle(self, articleId): cursor = mysql_conn.cursor() selectStmt = """SELECT * FROM articles WHERE article_id=%s""" data = (articleId,) cursor.execute(selectStmt, data) row = cursor.fetchone() self.articleTitle = row[1] self.articleDate = row[2] self.articleAuthor = self.selectAuthor(row[3]) self.articleContent = row[4] return row
def deleteAssciationsForEntity(self, entity): cursor = mysql_conn.cursor() selectStmt = """SELECT entity_id FROM entities WHERE entity=%s""" data = (entity,) cursor.execute(selectStmt, data) entityId = cursor.fetchone()[0] deleteStmt = """DELETE FROM assocEntityArticle WHERE entity_id=%s""" data = (entityId,) cursor.execute(deleteStmt, data) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit()
def classifyAllAssociations(self): cursor = mysql_conn.cursor() entities = self.selectAllEntities() for (entityId, entity) in entities: manualPol = self.selectManualPolaritiesForEntity(entityId) trainingData = [self.selectArticle(id_)[4] for (id_, _) in manualPol] trainingTarget = [polarity for (_, polarity) in manualPol] algorithm = self.algorithmComboBox.currentText() textClf = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', classifiers[algorithm]), ]) textClf.fit(trainingData, trainingTarget) # select all articles associated with entity that need to be classified selectStmt = """SELECT article_id FROM assocEntityArticle WHERE polarity_manual IS NULL AND polarity_calculated IS NULL AND entity_id=%s""" data = (entityId,) cursor.execute(selectStmt, data) ids = cursor.fetchall() if len(ids) > 0: ids = [a[0] for a in ids] testData = [self.selectArticle(id_)[4] for id_ in ids] predicted = textClf.predict(testData) print [x for x in predicted].count(1) updateData = zip(predicted, ids) updateData = [(polarity, entityId, id_) for (polarity, id_) in updateData] updateStmt = """UPDATE assocEntityArticle SET polarity_calculated=%s WHERE entity_id=%s AND article_id=%s""" cursor.executemany(updateStmt, updateData) cursor.execute("""COMMIT""") self.articleInfoUpdate.emit()
def selectCountAssociationsForEntityBetweenDates(self, entityId, fromDate, toDate): cursor = mysql_conn.cursor() months = self.monthsBetweenDates(fromDate, toDate) selectStmt = """SELECT count(*) FROM assocEntityArticle a, articles b WHERE a.article_id = b.article_id AND b.date BETWEEN %s AND %s AND a.entity_id=%s""" associations = [] if len(months) != 0: for month in months: fromDateString = self.getStringDate(month) toDateString = self.getStringDate(month.addMonths(1)) data = (fromDateString, toDateString, entityId) cursor.execute(selectStmt, data) count = cursor.fetchone()[0] associations.append((month, count)) return associations
def removeEntity(self): selected = self.entityList.selectedItems() cursor = mysql_conn.cursor() for item in selected: self.deleteAssciationsForEntity(item.text()) selectStmt = """SELECT entity_id FROM entities WHERE entity=%s""" data = (item.text(),) cursor.execute(selectStmt, data) entityId = cursor.fetchone() deleteStmt = """DELETE FROM entities WHERE entity_id=%s""" data = (entityId[0],) cursor.execute(deleteStmt, data) cursor.execute("""COMMIT""") self.entityUpdate.emit()
def selectEntitiesInArticle(self): cursor = mysql_conn.cursor() entities = [] selectStmt = """SELECT entity_id FROM assocEntityArticle WHERE article_id=%s""" data = (self.articleId,) cursor.execute(selectStmt, data) listOfEntityIds = cursor.fetchall() listOfEntityIds = [pair[0] for pair in listOfEntityIds] selectStmt = """SELECT entity FROM entities WHERE entity_id=%s""" for entityId in listOfEntityIds: data = (entityId,) cursor.execute(selectStmt, data) entity = (entityId, cursor.fetchone()[0]) entities.append(entity) return entities
def selectEntitiesInArticle(self): cursor = mysql_conn.cursor() entities = [] selectStmt = """SELECT entity_id FROM assocEntityArticle WHERE article_id=%s""" data = (self.articleId, ) cursor.execute(selectStmt, data) listOfEntityIds = cursor.fetchall() listOfEntityIds = [pair[0] for pair in listOfEntityIds] selectStmt = """SELECT entity FROM entities WHERE entity_id=%s""" for entityId in listOfEntityIds: data = (entityId, ) cursor.execute(selectStmt, data) entity = (entityId, cursor.fetchone()[0]) entities.append(entity) return entities
def classifyEntitiesInArticle(self): cursor = mysql_conn.cursor() for i in xrange(len(self.entities)): if len(self.calculatedClassLabels[i].text().strip()) == 0: entityId, entity = self.entities[i] manualPol = self.selectManualPolaritiesForEntity(entityId) trainingData = [self.selectArticle(id_)[4] for (id_, _) in manualPol] trainingTarget = [polarity for (_, polarity) in manualPol] countVect = CountVectorizer() trainingDataCounts = countVect.fit_transform(trainingData) tfidfTransformer = TfidfTransformer() trainingTfidf = tfidfTransformer.fit_transform(trainingDataCounts) clf = MultinomialNB().fit(trainingTfidf, trainingTarget) testData = [self.articleContent] testDataCounts = countVect.transform(testData) testTfidf = tfidfTransformer.transform(testDataCounts) predicted = clf.predict(testTfidf)