def store_email(email, box, email_owner): # Use nvarchar in case storing non-english data target, starred = False, False # TO DO - make table name a variable store_e = ''' insert into raw_data_2 (message_id, thread_id, to_email, from_email, cc, date, starred, subject, body, sub_body, email_owner, box, target) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); ''' # in psycopg - var placeholder must be %s even with int or dates or other types # prep text for storage body = cpm.clean_raw_txt(email.body) subject = cpm.clean_raw_txt(email.subject) sub_body = subject + ' ' + body # Marks taget colum if 'Jeeves' in email.labels and '\\Starred' in email.labels: target = starred = True elif '\\Starred' in email.labels: starred = True # Could apply executemany with query and list of values but still need to do fetch first and apply changes with connect_db() as db: try: db.execute(store_e, (email.message_id, email.thread_id, email.to, email.fr, email.cc, email.sent_at, starred, subject, body, sub_body, email_owner, box, target)) except psycopg2.IntegrityError: # if exists then skip loading it with open('../not_needed/load_errors.txt', 'a') as f: print "Problem loading email" f.write(email.message_id, email_fr, body)
def listuser(self): db = config.connect_db() cur = db.cursor() cur.execute("select * from user") cur.close() rv = cur.fetchall() return jsonify(rv)
def main(box, email_owner, date=datetime.datetime.now()): table_name = 'raw_data_2' table_data = { table_name: [ "message_id varchar(255) not null, \ thread_id varchar(255), \ to_email text, \ from_email varchar(255), \ cc varchar(255), \ date timestamp, \ starred boolean, \ subject text, \ body text, \ sub_body text, \ email_owner varchar(255),\ box varchar(255), \ target boolean, \ PRIMARY KEY(message_id)", "message_id"] } # would be good to check if table exists and only create if it doesn't with connect_db() as db: db.execute('''select * from information_schema.tables where table_name=%s''', (table_name,)) if not bool(db.rowcount): build_tables(table_data) get_data(box, email_owner, date) print_table_size(table_name)
def main(): config.connect_db() config.load_all() my_bot = bot.Bot(config.auth["name"], config.auth["password"], config.auth["pm"]) my_bot.start_time = start_time for room in config.Room.room_names: room = config.Room(room) if not room.blocked: my_bot.joinRoom(room.name) try: my_bot.main() finally: config.save_all() config.database.commit() config.disconnect_db()
def loadimg(self, id): db = config.connect_db() cur = db.cursor() cur.execute("select photo from photo where location_id=%s", (id, )) cur.close() rv = cur.fetchall() photos = [] for data in rv: photos.append(data[0]) return jsonify(photos)
def prodUpdate(self, name, stock): self.name = name self.stock = stock querydata = (self.stock, self.name) c,conn = config.connect_db(self) c.execute('''update products set stock = ? where name = ?''',querydata) conn.commit() conn.close()
def textsearch(self): db = config.connect_db() cur = db.cursor() location_name = [] cur.execute("select DISTINCT(name) from location_search") for location in cur.fetchall(): location_name.append(location[0]) cur.execute("update location_searchs set search_text=%s", (str(location_name), )) db.commit() cur.close() return jsonify('true')
def init_huya(): game_list = {} # 爬取网络游戏 page = 1 while True: params = huya_params params['gameId'] = 100023 params['page'] = page r = requests.get(huya_url, params, headers=headers) if not r.json()['data']['datas']: break for j in r.json()['data']['datas']: if j['gameFullName'] not in game_list: game_list[j['gameFullName']] = str(j['gid']) page = page + 1 # 爬取手机游戏 page = 1 while True: params = huya_params params['gameId'] = 100004 params['page'] = page r = requests.get(huya_url, params, headers=headers) if not r.json()['data']['datas']: break for j in r.json()['data']['datas']: if j['gameFullName'] not in game_list: game_list[j['gameFullName']] = str(j['gid']) page = page + 1 # 爬取单机游戏 page = 1 while True: params = huya_params params['gameId'] = 100002 params['page'] = page r = requests.get(huya_url, params, headers=headers) if not r.json()['data']['datas']: break for j in r.json()['data']['datas']: if j['gameFullName'] not in game_list: game_list[j['gameFullName']] = str(j['gid']) page = page + 1 db = connect_db() cursor = db.cursor() for game, value in game_list.items(): sql = "insert into init(game, huya) VALUES('%s', '%s') on DUPLICATE key update huya='%s'" % (game, value, value) cursor.execute(sql) db.commit() db.close()
def get_not_in_db(start, end): game_list = get_part_douyu(start, end) db = connect_db() cursor = db.cursor() sql = "select gid, douyu from init where douyu is not null" cursor.execute(sql) results = cursor.fetchall() already_in = [] for result in results: already_in.append(result[1]) for key, game in game_list.items(): if key not in already_in: print(game + ": " + key)
def init_bilibili(): game_list = {} page = 1 while True: params = bilibili_params params['area_id'] = 0 params['page'] = page params['parent_area_id'] = 2 r = requests.get(bilibili_url, params, headers=headers) # 判空,结束循环 if not r.json()['data']: break # 遍历每一页的人数 for j in r.json()['data']: if j['area_name'] not in game_list: game_list[j['area_name']] = "2_" + str(j['area_id']) page = page + 1 page = 1 while True: params = bilibili_params params['area_id'] = 0 params['page'] = page params['parent_area_id'] = 3 r = requests.get(bilibili_url, params, headers=headers) # 判空,结束循环 if not r.json()['data']: break # 遍历每一页的人数 for j in r.json()['data']: if j['area_name'] not in game_list: game_list[j['area_name']] = "3_" + str(j['area_id']) page = page + 1 db = connect_db() cursor = db.cursor() # 插入数据前清空表 deletesql = 'delete from init' cursor.execute(deletesql) for game, value in game_list.items(): sql = "insert into init(game,bilibili) VALUES('%s', '%s')" % (game, value) cursor.execute(sql) db.commit() db.close()
def init_douyu(): game_list = {} for i in range(1, 550): r = requests.get(douyu_url + "2_" + str(i) + "/1", headers=headers) if not r.json()['data']['rl']: continue category = r.json()['data']['rl'][0]['cid1'] if category == 1 or category == 9 or category == 15: game_list[r.json()['data']['rl'][0]['c2name']] = "2_" + str(i) db = connect_db() cursor = db.cursor() for game, value in game_list.items(): sql = "insert into init(game, douyu) VALUES('%s', '%s') on DUPLICATE key update douyu='%s'" % ( game, value, value) cursor.execute(sql) db.commit() db.close()
def generatechart(self): db = config.connect_db() cur = db.cursor() Current_Date = date.today() Start_Date = Current_Date + timedelta(days=-6) delta = Current_Date - Start_Date cur.execute( "SELECT DISTINCT(location_id) as location_id FROM tracking_visitor WHERE date(date)>=%s and date(date)<=%s", (str(Start_Date.strftime("%Y-%m-%d")), str(Current_Date.strftime("%Y-%m-%d")))) for location in cur.fetchall(): data = [] dates = [] StartDate = Start_Date delta1 = timedelta(days=1) location_id = location[0] while StartDate <= Current_Date: count = cur.execute( "SELECT * FROM tracking_visitor WHERE location_id=%s and date(date)=%s", (location_id, StartDate.strftime("%Y-%m-%d"))) data.append(int(count)) dates.append(StartDate.strftime("%Y-%m-%d")) StartDate += delta1 days = (tuple(dates)) y_pos = np.arange(len(days)) plt.rcParams['axes.spines.right'] = False plt.rcParams['axes.spines.top'] = False plt.figure(figsize=(10, 4)) plt.plot(y_pos, data, color='r') plt.title("ສະຖິຕິ/Statistics", fontname="Phetsarath OT", fontweight='bold', fontsize=20) plt.xticks(y_pos, days) plt.xticks(rotation=45) plt.savefig('/home/cbr/python/api/images/' + str(location_id) + '.png', bbox_inches="tight") # plt.savefig('D:/Projectmobile/maplaos/api/images/'+str(location_id)+'.png',bbox_inches = "tight") plt.close() cur.close() return jsonify('true')
douyu_limit = 5000 huya_limit = 40000 def thread_travel(id, item, type): if type == 1: item['bilibili'] += travel_bilibili(id, bilibili_limit) elif type == 2: item['douyu'] += travel_douyu(id, douyu_limit) else: item['huya'] += travel_huya(id, huya_limit) pretime = time.time() # 连接数据库 db = connect_db() db.autocommit(True) cursor = db.cursor() # 查出各游戏的参数,status为0表示遍历,为1表示不再遍历 # sql = "select bilibili, douyu, huya, game, gid, status from init where status <= 3 and bilibili is not null" sql = "select bilibili, douyu, huya, game, gid, status from init where status = 0 and bilibili is not null" cursor.execute(sql) games = cursor.fetchall() # # 查出已不再遍历的游戏 # sql = "select bilibili, douyu, huya, game, gid, status from init where status > 3" # cursor.execute(sql) # remain_games = cursor.fetchall() # # 抽取6.25%的游戏做复活处理 # resurrection_list = random.sample(remain_games, k=int(len(remain_games) * 0.0625)) # print("本次共对" + str(len(resurrection_list)) + "个游戏做复活尝试:")
def create_index(table_name, idx_col): with connect_db() as db: db.execute('''create index id_%s on %s (%s);''' % (table_name, table_name, idx_col))
class Processo(Base): __tablename__ = 'tb_processo_tj' id = db.Column(db.BigInteger, db.Sequence('id_processo_seq'), primary_key=True) siglaTribunal = db.Column(db.String(100)) grau = db.Column(db.String(100)) numero = db.Column(db.String(100)) dataAjuizamento = db.Column(db.DateTime()) classeProcessual = db.Column(db.Integer) codigoOrgao = db.Column(db.Integer) codigoMunicipioIBGE = db.Column(db.Integer) instancia = db.Column(db.String(100)) codigoLocalidade = db.Column(db.String(100)) # movimento = relationship('Movimento') def __repr__(self): return "<Processo(siglaTribunal='%s', grau='%s', numero='%s', dataAjuizamento='%s', classeProcessual='%s', codigoOrgao='%s', codigoMunicipioIBGE='%s', instancia='%s', codigoLocalidade='%s')>" % ( self.siglaTribunal, self.grau, self.numero, self.dataAjuizamento, self.classeProcessual, self.codigoOrgao, self.codigoMunicipioIBGE, self.instancia, self.codigoLocalidade) # Cria as tabelas no banco engine = connect_db() Processo.__table__.create(bind=engine, checkfirst=True) Movimento.__table__.create(bind=engine, checkfirst=True)
def _load_data(table, cols): with connect_db() as db: db.execute('''SELECT * from %s''' % table) return pandas.DataFrame(db.fetchall(), columns=cols)
import datetime from mongoengine import (Document, DynamicDocument, DateTimeField, ReferenceField, SequenceField, StringField) from config import connect_db from app.users.models import User connect_db() class FormLink(Document): fid = SequenceField() name = StringField(max_length=50) creator = ReferenceField(User, dbref=True) date_modified = DateTimeField(default=datetime.datetime.now) class Thread(Document): tid = SequenceField() formlink = ReferenceField(FormLink, dbref=True) date_modified = DateTimeField(default=datetime.datetime.now) class FormData(DynamicDocument): creator = ReferenceField(User, dbref=True) thread = ReferenceField(Thread, dbref=True) date_modified = DateTimeField(default=datetime.datetime.now) class InboundData(DynamicDocument): date_modified = DateTimeField(default=datetime.datetime.now)
def create_table(table_name, values): with connect_db() as db: db.execute('''create table %s(%s)''' % (table_name, values))
import os import json import logging import argparse from glob import glob from datetime import datetime import pandas as pd from sqlalchemy.orm import sessionmaker from model import Processo, Movimento from config import connect_db conn_db = connect_db() #establish connection Session = sessionmaker(bind=conn_db) session = Session() logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument( '--segmento', '-sj', type=str, choices=[ 'eleitoral', 'estadual', 'federal', 'militar', 'trabalho', 'tribunal_superior' ], help='Escolher qual é o segmento da justica. Ex.: eleitoral, estadual, etc.' ) parser.add_argument(
def drop_table(table_name): with connect_db() as db: db.execute('''DROP TABLE IF EXISTS %s;''' % (table_name))
def print_table_size(table_name): with connect_db() as db: db.execute('''SELECT count(*) from %s''' % table_name) print db.fetchone()