def plot(sql, title, headers): connection.run_sql(sql) results = connection.results() html = _build_table(results, title, headers) _save_table(html, results, title)
def plot_transpose(sql, title, headers): connection.run_sql(sql) results = connection.results() columns = connection.columns() transposed = [] for index, value in enumerate(results[0]): transposed.append((columns[index], str(value))) html = _build_table(transposed, title, headers) _save_table(html, transposed, title)
def save(self): sql = """ insert into post ( id, title, body, tags, parent_id, answer_count, accepted_answer_id, creation_date, last_activity_date, score, last_edit_date, last_editor_user_id, last_editor_display_name, post_type_id, comment_count, view_count, favorite_count, owner_user_id, owner_display_name, closed_date ) values ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ); """ values = [] values.append(self.data.get('Id', None)) values.append(self.data.get('Title', None)) values.append(self.data.get('Body', None)) values.append(self.data.get('Tags', None)) values.append(self.data.get('ParentId', None)) values.append(self.data.get('AnswerCount', None)) values.append(self.data.get('AcceptedAnswerId', None)) values.append(self.data.get('CreationDate', None)) values.append(self.data.get('LastActivityDate', None)) values.append(self.data.get('Score', None)) values.append(self.data.get('LastEditDate', None)) values.append(self.data.get('LastEditorUserId', None)) values.append(self.data.get('LastEditorDisplayName', None)) values.append(self.data.get('PostTypeId', None)) values.append(self.data.get('CommentCount', None)) values.append(self.data.get('ViewCount', None)) values.append(self.data.get('FavoriteCount', None)) values.append(self.data.get('OwnerUserId', None)) values.append(self.data.get('OwnerDisplayName', None)) values.append(self.data.get('ClosedDate', None)) connection.run_sql(sql, values)
def insert_link(self, link): connection.run_sql( 'SELECT count(*) FROM cited_links WHERE post = %s AND link = %s', [self.post_id, link]) previous_results = connection.results() if previous_results[0][0] > 0: return location = urlparse(link).netloc print(location) connection.run_sql( 'INSERT INTO cited_links (post, link, location) VALUES (%s, %s, %s)', [self.post_id, link, location])
def plot(sql, title, legend=False): connection.run_sql(sql) results = connection.results() x, y, total = _separate(results) N = len(x) ind = range(N) # the x locations for the groups width = 0.50 # the width of the bars fig, ax = plt.subplots() rects = [] bottom = [0 for element in y[0]] for i, data in enumerate(y): rects.append(ax.bar(ind, data, width, color=colors[i], bottom=bottom)) bottom = [element + y[i][j] for j, element in enumerate(bottom)] # add some text for labels, title and axes ticks ax.set_title(title) ax.set_xticks([i + width / 2 for i in ind]) ax.set_xticklabels([label for label in x]) if (legend != False): plt.legend(rects, legend) # ax.legend(rects1[0], ('Men')) if (len(rects) == 1): label_top(rects[0], ax) else: label_middle(rects, ax) plt.axis([0 - width, len(x), 0, float(max(total)) * 1.618]) plt.savefig(utils.filename_from_title(title, 'png')) plt.clf()
import connection from html.parser import HTMLParser from urllib.parse import urlparse connection.run_sql('select body, id from post') posts = connection.results() class LinkFinder(HTMLParser): def set_post(self, post_id): self.post_id = post_id def handle_starttag(self, tag, attrs): if (tag == 'a'): for attr in attrs: if (attr[0] == 'href'): self.insert_link(attr[1]) def insert_link(self, link): connection.run_sql( 'SELECT count(*) FROM cited_links WHERE post = %s AND link = %s', [self.post_id, link]) previous_results = connection.results() if previous_results[0][0] > 0: return location = urlparse(link).netloc print(location)
import connection from post import Post from tcc_themes import vocabulary from tcc_themes import theme from tcc_themes import stemmer connection.run_sql( 'select stemmed_body, id, title from post where question_type <> 4') posts = connection.results() documents = [] for post in posts: text = post[0] stemmed_title = stemmer.process_text(post[2]) text = text + ' ' + stemmed_title + ' ' + stemmed_title documents.append(text) words = vocabulary.build(documents, N=2500) documents_themes, topics = theme.assign(documents, words, topics=3) connection.run_sql('delete from topic') for i, topic in enumerate(topics): sql = "INSERT INTO topic VALUES (%s, %s)" values = [i + 1, topic] connection.run_sql(sql, values) for i, topics in enumerate(documents_themes): str_topics = map(str, topics) connection.run_sql("update post set topics = '{%s}' where id = %d" % (', '.join(str_topics), posts[i][1]))
import connection from tcc_themes import stemmer connection.run_sql('select body, id from post') posts = connection.results() for i, post in enumerate(posts): body = post[0] body = stemmer.process_text(body) connection.run_sql('update post set stemmed_body = %s where id = %s', (body, post[1])) if i % 100 == 0 and i > 0: print("%d posts processados" % (i)) print("%d posts processados" % (len(posts)))
def get_question_ids(): sql = """ select id from post where post_type_id = 1; """ connection.run_sql(sql) return connection.results()
parser.add_argument('-a', '--ascending', dest='is_ascending', type=bool, default=False, help='iverts the order') args = parser.parse_args() if args.is_ascending: direction = 'ASC' else: direction = 'DESC' connection.run_sql(""" SELECT id, stemmed_body, body, title, topics FROM post WHERE question_type <> 4 AND topics[%d] > 0.5 ORDER BY topics[%d] %s LIMIT %d """ % (args.topic, args.topic, direction, args.number_of_posts)) posts = connection.results() for post in posts: Post.print(post[2], post[3], post[0])
import connection import html import re from random import shuffle OKGREEN = '\033[92m' OKBLUE = '\033[94m' BOLD = '\033[1m' ENDC = '\033[0m' regex_tags = re.compile(r'(<!--.*?-->|<[^>]*>)') regex_dbunit = re.compile(r'(dbunit)', re.IGNORECASE) connection.run_sql('select body, id, title from post where question_type is null and post_type_id = 1') posts = connection.results() connection.run_sql('select count(id) as conta from post where question_type is null and post_type_id = 1') conta = connection.results() conta = conta[0][0] shuffle(posts) for post in posts: body = post[0] title = post[2] body = regex_tags.sub('', body) body = html.unescape(body) body = regex_dbunit.sub(OKBLUE + BOLD + r'\1' + ENDC, body) conta -= 1 print (OKGREEN + BOLD + title + ENDC + "\n") print (body) print ("\nFaltam classificar %s posts\n" % conta)