def createTFIDFTopics(self): self.db = psycopg2.connect("dbname=%s user=%s password=%s host=%s" % ( self.dbname, self.dbuser, self.dbpass, self.dbhost)) c = self.db.cursor() headlines = {} c.execute( "SELECT article_day,country,title,url,article_hash FROM articles_headlines") for row in c.fetchall(): title = row[2] # c.execute('SELECT content from articles where hash = ?',(row[4],)) # content = c.fetchone()[0] lista = headlines.get(str(row[0])+'-'+row[1]) if lista is None: # headlines[str(row[0])+'-'+row[1]] = [title + ' ' + content] headlines[str(row[0])+'-'+row[1]] = [title] else: # headlines[str(row[0])+'-'+row[1]].append(title + ' ' + content) headlines[str(row[0])+'-'+row[1]].append(title) self.db.close() for hd, contents in headlines.items(): print(f'>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> {hd}') with open('stopwords.txt', 'r') as st: tfidf = TfIdf(stopwords=[x.strip() for x in st.readlines()]) tfidf.parse(contents)
def createTFIDFTopics(self): self.db = sqlite3.connect(self.dbname, detect_types=sqlite3.PARSE_DECLTYPES) c = self.db.cursor() headlines = {} c.execute( "SELECT article_day,country,title,url,article_hash FROM articles_headlines" ) for row in c.fetchall(): title = row[2] # c.execute('SELECT content from articles where hash = ?',(row[4],)) # content = c.fetchone()[0] lista = headlines.get(str(row[0]) + '-' + row[1]) if lista is None: # headlines[str(row[0])+'-'+row[1]] = [title + ' ' + content] headlines[str(row[0]) + '-' + row[1]] = [title] else: # headlines[str(row[0])+'-'+row[1]].append(title + ' ' + content) headlines[str(row[0]) + '-' + row[1]].append(title) self.db.close() for hd, contents in headlines.iteritems(): print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ' + hd with open('stopwords.txt', 'r') as st: tfidf = TfIdf(stopwords=[x.strip() for x in st.readlines()]) tfidf.parse(contents)