def main(): conn = db_utils.connect('localhost', 'root', 'boletin_oficial') g = Graph() try: fuente = list( g.fuentes_de_informacion.index.lookup( nombre="DineroYPolitica.org"))[0] except IndexError: fuente = g.fuentes_de_informacion.create(nombre="DineroYPolitica.org") for r in db_utils.query_db(conn.cursor(), QUERY): # XXX TODO # partidos politicos # personas (fisicas, juridicas) ver que onda con los cuits/dnis. tabla providers # donaciones # listas? LOG.info('%r - %r - %r %r' % ( articulo, dependencia, puesto, persona, ))
def main(): conn = db_utils.connect('localhost', 'root', 'boletin_oficial') g = Graph() # ['articulo_id', 'articulo_texto', 'dependencia_id', 'dependencia_nombre', 'per_apellido', 'per_boletines', 'per_cuit', 'per_dni', 'per_domicilio_especial', 'per_estado_civil', 'per_id', 'per_nombre', 'per_nya', 'per_prefijo', 'per_sufijo', 'per_titulo', 'puesto_id', 'puesto_nombre'] for r in db_utils.query_db(conn.cursor(), QUERY): articulo = g.articulos.create(id=r['articulo_id'], texto=r['articulo_texto'].decode('utf-8')) try: dependencia = list(g.dependencias.index.lookup(id=r['dependencia_id']))[0] except IndexError: dependencia = g.dependencias.create(id=r['dependencia_id'], nombre=r['dependencia_nombre'].decode('utf-8')) try: puesto = list(g.puestos.index.lookup(id=r['puesto_id']))[0] except IndexError: puesto = g.puestos.create(id=r['puesto_id'], nombre=r['puesto_nombre'].decode('utf-8')) try: persona = list(g.personas.index.lookup(id=r['per_id']))[0] except IndexError: persona = g.personas.create(dni=r['per_dni'], nombre_y_apellido=r['per_nya'].decode('utf-8'), id=r['per_id']) g.nombramientos.create(persona, dependencia, { 'puesto_id': r['puesto_id'] }) g.plantel.create(dependencia, puesto, { 'persona_id': r['per_id'] }) LOG.info('%r - %r - %r %r' % (articulo, dependencia, puesto, persona,))
def interval(): # file name will be stored in interval_file_name client = db_utils.connect() data = " ".join([item['cleaned_text'] for item in client.tweeter_db.updates.find({},{'cleaned_text':1,'_id':0})]) create_word_cloud(data,"wc.png") """ We will be generating word cloud, after every iteration """ pass
def __insert_new_user(user_id, date): client = db_utils.connect() db = client[skill_config.DB_NAME] db.users.insert_one({ 'user_id': user_id, 'date': date, 'fetched_events': [] })
def load_log (path, filename, site_id, ip_addr): """Load and parse the csv log files, and insert them into the circuit and power_reading tables Two types of log files: (1) A Main Circuit log file -- column headers in MAIN_LOG list (2) A Regular Circuit log file -- REGR_LOG """ try: file_obj = open(os.path.join(path, filename), "r") data = file_obj.read() file_obj.close() # memoize these here, to avoid calling get_or_create_circuit() too much circuit_pk_list = {} data_dicts = [] # list of dicts to bulk insert for i, line in enumerate(data.splitlines()): log_data = line.split(',') if len(log_data) == MAIN_LEN or len(log_data) == REGR_LEN: # ignore the csv file header line if log_data[0] != """Time Stamp""": # get the circuit id (circuit table pk) machine_id = log_data[18] circuit_id = '-'.join([machine_id, site_id, ip_addr]) if not circuit_pk_list.has_key(circuit_id): circuit_pk = get_or_create_circuit( machine_id, site_id, ip_addr, (len(log_data) == len(MAIN_LOG))) circuit_pk_list[circuit_id] = circuit_pk circuit_pk = circuit_pk_list[circuit_id] line_dict = parse_log_line (circuit_pk, log_data) if None in line_dict: print >> sys.stderr, ' '.join(["Error: bad content line", i, line, "from file", os.path.join(path, filename)]) else: data_dicts.append( line_dict ) if len(data_dicts) > 0: conn = connect(DBNAME, DBUSER, PWD) if conn: insert (conn, 'power_reading', columns=data_dicts[0].keys(), inserts=data_dicts, close_conn=True) except IOError: print >> sys.stderr, "Error: could not open", os.path.join(path, filename)
def main(): start_time = time.time() parser = get_parser() args = parser.parse_args() db_connection = db_utils.connect(args) if args.command == 'define_study_timeframe': study_timeframe_id = define_study_timeframe( db_connection, args.study_timeframe_name, args.study_timeframe_description, args.study_start_year, args.years_per_period, args.n_periods) if study_timeframe_id: print("Created study timeframe. id=", study_timeframe_id) return elif args.command == 'sample_timeseries': print('got sample_timeseries') return demand_scenario = args.demand_scenario month_sampling_frequency = args.month_sampling_frequency start_month = args.start_month hour_sampling_frequency = args.hour_sampling_frequency start_hour = args.start_hour """ demand_scenario demand_timeseries (ref raw_timepoint) study_timeframe period period_all_timeseries raw_timeseries raw_timepoint SELECT * FROM switch.period JOIN switch.period_all_timeseries USING (period_id) JOIN switch.raw_timepoint USING (raw_timeseries_id) JOIN switch.demand_timeseries USING (raw_timepoint_id) WHERE demand_scenario_id = 31 and period.study_timeframe_id = 1 limit 10; INSERT INTO time_sample( time_sample_id, study_timeframe_id, name, method, description) VALUES (?, ?, ?, ?, ?); """ sql = ("SELECT * FROM switch.demand_scenario WHERE demand_scenario_id = {}" ).format(args.demand_scenario) print(sql) df = pd.read_sql(sql, db_connection) print("demand_scenario: ", df) end_time = time.time() print('\nScript ran in %s seconds.' % (end_time - start_time))
def __get_number_date_events(event_date): client = db_utils.connect() db = client[skill_config.DB_NAME] res = db.ephemeris.count_documents({ 'date': event_date }) db_utils.close(client) return res
def __user_exists(user_id, date): client = db_utils.connect() db = client[skill_config.DB_NAME] res = db.users.count_documents({ 'user_id': user_id, 'date': date }) db_utils.close(client) return res == 1
def __get_already_fetched_events(user_id, date): client = db_utils.connect() db = client[skill_config.DB_NAME] res = db.users.find_one({ 'user_id': user_id, 'date': date }) db_utils.close(client) return res['fetched_events']
def key_word_cloud(keyword='lockdown'): """ We will be generating word cloud, on keyword basis """ #get the current date in the formate toquery dat_for_filter = time.strftime("%Y%m%d") client = db_utils.connect() data = " ".join([item['cleaned_text'] for item in client.tweeter_db.news_tweets.find({'$and':[{'cleaned_text':{'$regex':keyword,'$options':'i'}}, {'tmstamp':{'$regex':'^'+dat_for_filter+'.*'}}]},{'cleaned_text':1,'_id':0})]) client = None #need to drop keywords which match lockdown data = data.lower().replace(keyword,"") create_word_cloud(data, keyword+dat_for_filter+".png") pass
def __reset_user_fetched_events(user_id, date): client = db_utils.connect() db = client[skill_config.DB_NAME] db.users.find_one_and_update({ 'user_id': user_id, 'date': date }, { '$set': { 'fetched_events': [] } }) db_utils.close(client)
def __add_user_fetched_event(user_id, date, event_id): client = db_utils.connect() db = client[skill_config.DB_NAME] db.users.find_one_and_update({ 'user_id': user_id, 'date': date }, { '$push': { 'fetched_events': event_id } }) db_utils.close(client)
def main(): #check if valid input if len(sys.argv)==2: #get database name and connect to it db.connect(sys.argv[1]) if db.CON is not None: #print init message print(f"""Successfully connected to {db.NAME}\nType 'help' for more information.\n Make sure to call 'init' if this is a new database""") while True: #ead and parse user input user_in = input('>') print('\n') cmd = user_in.split(maxsplit=1) function = cmd[0].strip() if function == 'quit': break elif function == 'help': dbhelp() elif function == 'dbinfo': db.info() elif function == 'init': db.make_db() elif function == 'update' :db.update() else: print_result(function, parse(user_in)) else: print('Could not connect.') else: print("Error: Please enter a database name.")
def main(): conn = db_utils.connect('localhost', 'root', 'boletin_oficial') g = Graph() try: fuente = list(g.fuentes_de_informacion.index.lookup(nombre="DineroYPolitica.org"))[0] except IndexError: fuente = g.fuentes_de_informacion.create(nombre="DineroYPolitica.org") for r in db_utils.query_db(conn.cursor(), QUERY): # XXX TODO # partidos politicos # personas (fisicas, juridicas) ver que onda con los cuits/dnis. tabla providers # donaciones # listas? LOG.info('%r - %r - %r %r' % (articulo, dependencia, puesto, persona,))
def main(): conn = db_utils.connect('localhost', 'root', 'boletin_oficial') g = Graph() # ['articulo_id', 'articulo_texto', 'dependencia_id', 'dependencia_nombre', 'per_apellido', 'per_boletines', 'per_cuit', 'per_dni', 'per_domicilio_especial', 'per_estado_civil', 'per_id', 'per_nombre', 'per_nya', 'per_prefijo', 'per_sufijo', 'per_titulo', 'puesto_id', 'puesto_nombre'] for r in db_utils.query_db(conn.cursor(), QUERY): articulo = g.articulos.create( id=r['articulo_id'], texto=r['articulo_texto'].decode('utf-8')) try: dependencia = list( g.dependencias.index.lookup(id=r['dependencia_id']))[0] except IndexError: dependencia = g.dependencias.create( id=r['dependencia_id'], nombre=r['dependencia_nombre'].decode('utf-8')) try: puesto = list(g.puestos.index.lookup(id=r['puesto_id']))[0] except IndexError: puesto = g.puestos.create( id=r['puesto_id'], nombre=r['puesto_nombre'].decode('utf-8')) try: persona = list(g.personas.index.lookup(id=r['per_id']))[0] except IndexError: persona = g.personas.create( dni=r['per_dni'], nombre_y_apellido=r['per_nya'].decode('utf-8'), id=r['per_id']) g.nombramientos.create(persona, dependencia, {'puesto_id': r['puesto_id']}) g.plantel.create(dependencia, puesto, {'persona_id': r['per_id']}) LOG.info('%r - %r - %r %r' % ( articulo, dependencia, puesto, persona, ))
def __get_random_event_not_fetched(date, already_fetched): client = db_utils.connect() db = client[skill_config.DB_NAME] events = db.ephemeris.aggregate([ { '$match': { 'date': date, '_id': { '$nin': already_fetched } } }, { '$sample': { 'size': 1 } } ]) db_utils.close(client) return list(events)[0]
def get_or_create_circuit (machine_id, site_id, ip_addr, is_main=False): """Lookup the machine_id, site_id and ip_addr in the circuit table and return its pk, creating a new entry only if the combination doesn't already exist""" conn = connect(DBNAME, DBUSER, PWD) if conn: circuit_data = {'machine_id':machine_id, 'site_id':site_id, 'ip_addr':ip_addr, 'main_circuit':is_main} res = search (conn, """SELECT pk FROM circuit WHERE machine_id = %(machine_id)s and site_id = %(site_id)s and ip_addr = %(ip_addr)s""", circuit_data) if res is not None and len(res) > 0: return res[0][0] else: insert (conn, 'circuit', columns=circuit_data.keys(), inserts=[circuit_data]) return get_or_create_circuit (machine_id, site_id, ip_addr)
from datetime import datetime import json import sqlalchemy from sqlalchemy.orm import sessionmaker, relationship from conf import POSTGRES_DB, POSTGRES_HOST, POSTGRES_PASSWORD, POSTGRES_PORT, POSTGRES_USER from conf import get_logger from db_utils import connect, get_id, check_student_at_course_session from rmq_utils import get_channel logger = get_logger('checker') """ Create a session with the Postgre database """ con, db_meta = connect(user=POSTGRES_USER, password=POSTGRES_PASSWORD, database_name=POSTGRES_DB, host=POSTGRES_HOST, port=POSTGRES_PORT) Session = sessionmaker(bind=con) session = Session() """ Create a channel to RabbitMQ """ channel = get_channel() """ Message handler """ def message_handler(ch, method, properties, body): msg = json.loads(body) task = msg.get('task') if task == 'best_labels': """Save the best labels into the attendance table""" labels: list = msg.get('labels')
# This is the example of main program file which imports entities, # connects to the database, drops/creates specified tables # and populate some data to the database from pony.orm import * # or just import db_session, etc. import all_entities # This command make sure that all entities are imported from base_entities import db # Will bind this database from db_settings import current_settings # binding params db.bind(*current_settings['args'], **current_settings['kwargs']) from db_utils import connect from db_loading import populate_database if __name__ == '__main__': sql_debug(True) connect(db, drop_and_create='ALL') # drop_and_create=['Topic', 'Comment']) populate_database()
def get_all_data(): client = db_utils.connect() agg1 = [ { '$match': { '$and': [{ 'dateannounced': { '$ne': "" } }, { 'currentstatus': 'Hospitalized' }] } }, # reportedOn, status { '$group': { '_id': '$dateannounced', 'count': { '$sum': 1 } } }, { '$project': { 'dateannounced': 1, 'count': 1, 'date': { '$dateFromString': { 'dateString': '$_id', 'format': "%d/%m/%Y" } } } }, { '$sort': { 'date': 1 } } ] data_confirmed = list(client.covid_db.raw_data.aggregate(agg1)) agg2 = [{ '$match': { '$and': [{ 'dateannounced': { '$ne': "" } }, { 'currentstatus': 'Recovered' }] } }, { '$group': { '_id': '$dateannounced', 'count': { '$sum': 1 } } }, { '$project': { 'dateannounced': 1, 'count': 1, 'date': { '$dateFromString': { 'dateString': '$_id', 'format': "%d/%m/%Y" } } } }, { '$sort': { 'date': 1 } }] data_recovered = list(client.covid_db.raw_data.aggregate(agg2)) agg3 = [{ '$match': { '$and': [{ 'dateannounced': { '$ne': "" } }, { 'currentstatus': 'Deceased' }] } }, { '$group': { '_id': '$dateannounced', 'count': { '$sum': 1 } } }, { '$project': { 'dateannounced': 1, 'count': 1, 'date': { '$dateFromString': { 'dateString': '$_id', 'format': "%d/%m/%Y" } } } }, { '$sort': { 'date': 1 } }] data_deceased = list(client.covid_db.raw_data.aggregate(agg3)) # to get hospitalized data of all the states for current date #agg4=[{'$match':{'$and':[{'dateannounced':t_date}, {'currentstatus':'Hospitalized'} ]}}, # {'$group': {'_id': '$detectedstate','count': { '$sum': 1 }}}, # { '$sort' : { 'count' : -1} } # ] #data_statewise = list(coln.aggregate(agg4)) client = None return [data_confirmed, data_recovered, data_deceased]
def connect(cass_ip, cass_kp): db.connect(cass_ip, cass_kp)
def main(): client = db_utils.connect() data = [] dat_for_filter = time.strftime("%Y%m%d") data.append(" ".join([ item['cleaned_text'] for item in client.tweeter_db.news_tweets.find( { '$and': [{ 'cleaned_text': { '$regex': 'lockdown', '$options': 'i' } }, { 'tmstamp': { '$regex': '^' + dat_for_filter + '.*' } }] }, { 'cleaned_text': 1, '_id': 0 }) ])) data.append(". ".join([ item['cleaned_text'] for item in client.tweeter_db.news_tweets.find( { '$and': [{ 'cleaned_text': { '$regex': 'lockdown', '$options': 'i' } }, { 'tmstamp': { '$regex': '^' + dat_for_filter + '.*' } }] }, { 'cleaned_text': 1, '_id': 0 }) ])) client = None summary_text = "" if len(data) > 0: for text in data: # 1 Create the word frequency table freq_table = _create_frequency_table(text) ''' We already have a sentence tokenizer, so we just need to run the sent_tokenize() method to create the array of sentences. ''' # 2 Tokenize the sentences sentences = sent_tokenize(text) # 3 Important Algorithm: score the sentences sentence_scores = _score_sentences(sentences, freq_table) # 4 Find the threshold threshold = _find_average_score(sentence_scores) # 5 Important Algorithm: Generate the summary summary = _generate_summary(sentences, sentence_scores, 1.5 * threshold) summary_text = summary_text + " " + summary #print(summary) print( f"lenght of the text: {len(summary_text)} complete summary : {summary_text}" ) PIL_example.generate_image(summary_text)
def pie_chart(): agg4 = [{ '$match': { '$and': [{ 'dateannounced': t_date }, { 'currentstatus': 'Hospitalized' }] } }, { '$group': { '_id': '$detectedstate', 'count': { '$sum': 1 } } }, { '$sort': { 'count': -1 } }] client = db_utils.connect() data_confirmed = list(client.covid_db.raw_data.aggregate(agg4)) client = None if len(data_confirmed) == 0: return "" x = [k['_id'] for k in data_confirmed] y = [k['count'] for k in data_confirmed] #plt #fig = plt.figure(figsize=(30,18)) #ax1 = fig.add_axes([0,0,1,1]) #rect − A 4-length sequence of [left, bottom, width, height] quantities. fig, ax1 = plt.subplots(figsize=(30, 18), subplot_kw=dict(aspect="equal")) ax1.axis('equal') states = x #['C', 'C++', 'Java', 'Python', 'PHP'] counts = y #[23,17,35,29,12] states = [f"{x[i]} ({y[i]})" for i in range(len(x))] # explode 1st slice explode = [0] * (len(x) - 1) explode.insert(0, 0.1) explode = tuple(explode) wedges, texts, autotexts = ax1.pie( counts, explode=explode, labels=states, shadow=False, autopct='%1.2f%%', rotatelabels=True) #startangle=-40,wedgeprops=dict(width=0.5) ax1.set_title( f"Pie chart showing statewise infected counts for current date. The total patients count is {sum(counts)}" ) #ax1.text(3, 8, 'boxed italics text in data coords', style='italic', bbox = {'facecolor': 'red'}) #ax1.annotate('annotate', xy = (2, 1), xytext = (3, 4),arrowprops = dict(facecolor = 'black', shrink = 0.05)) ax1.legend(wedges, states, title=f"States ({sum(counts)})", loc="upper right", bbox_to_anchor=(0.6, 0.1, 0.5, 1)) #plt.show() plt.setp(autotexts, size=14, weight="bold") plt.savefig(img_dir + "Confirmed_statewise.png", facecolor='w', edgecolor='w')
def get_age_graph(): fig, ax = plt.subplots(figsize=(30, 18)) def convert_ages_to_int(data): y1 = [] for h in data: #print(h) try: v = int(h['agebracket']) y1.append(v) except: #print(h['agebracket']) if h['agebracket'] == "": #y1.append(0) pass elif (len(h['agebracket']) > 1) and (h['agebracket'].find("-") > -1): v = [ int(h['agebracket'].split("-")[0]), int(h['agebracket'].split("-")[1]) ] y1.append(mean(v)) return y1 client = db_utils.connect() hospitalized = list( client.covid_db.raw_data.find( { '$and': [{ 'dateannounced': { '$ne': "" } }, { 'currentstatus': 'Hospitalized' }] }, { 'agebracket': 1, '_id': 0 })) hosp_data = convert_ages_to_int(hospitalized) hospitalized = list( client.covid_db.raw_data.find( { '$and': [{ 'dateannounced': { '$ne': "" } }, { 'currentstatus': 'Recovered' }] }, { 'agebracket': 1, '_id': 0 })) reco_data = convert_ages_to_int(hospitalized) hospitalized = list( client.covid_db.raw_data.find( { '$and': [{ 'dateannounced': { '$ne': "" } }, { 'currentstatus': 'Deceased' }] }, { 'agebracket': 1, '_id': 0 })) deceas_data = convert_ages_to_int(hospitalized) client = None age_bucket = [ 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120 ] plt.hist( hosp_data, age_bucket, label='hospitalized', histtype='bar', rwidth=0.8 ) # calling hist function instead of plot or bar to plot histogram plt.hist(reco_data, age_bucket, label='recovered', histtype='bar', rwidth=0.8) plt.hist(deceas_data, age_bucket, label='dead', histtype='bar', rwidth=0.8) plt.xlabel("Age buckets with interval of 5.") plt.ylabel("Number of the patients") plt.title( "Histogram of number of age wise hospitalized, recovred and deceased covid patients." ) plt.legend() #plt.show() plt.savefig(img_dir + "Age wise distribution.png", facecolor='w', edgecolor='w', orientation='portrait')