Exemple #1
0
def main():
    conn = db_utils.connect('localhost', 'root', 'boletin_oficial')

    g = Graph()

    try:
        fuente = list(
            g.fuentes_de_informacion.index.lookup(
                nombre="DineroYPolitica.org"))[0]
    except IndexError:
        fuente = g.fuentes_de_informacion.create(nombre="DineroYPolitica.org")

    for r in db_utils.query_db(conn.cursor(), QUERY):
        # XXX TODO
        # partidos politicos
        # personas (fisicas, juridicas) ver que onda con los cuits/dnis. tabla providers
        # donaciones
        # listas?

        LOG.info('%r - %r - %r %r' % (
            articulo,
            dependencia,
            puesto,
            persona,
        ))
def main():
    conn = db_utils.connect('localhost', 'root', 'boletin_oficial')

    g = Graph()

    # ['articulo_id', 'articulo_texto', 'dependencia_id', 'dependencia_nombre', 'per_apellido', 'per_boletines', 'per_cuit', 'per_dni', 'per_domicilio_especial', 'per_estado_civil', 'per_id', 'per_nombre', 'per_nya', 'per_prefijo', 'per_sufijo', 'per_titulo', 'puesto_id', 'puesto_nombre']

    for r in db_utils.query_db(conn.cursor(), QUERY):

        articulo = g.articulos.create(id=r['articulo_id'],
                                      texto=r['articulo_texto'].decode('utf-8'))

        try:
            dependencia = list(g.dependencias.index.lookup(id=r['dependencia_id']))[0]
        except IndexError:
            dependencia = g.dependencias.create(id=r['dependencia_id'],
                                                nombre=r['dependencia_nombre'].decode('utf-8'))

        try:
            puesto = list(g.puestos.index.lookup(id=r['puesto_id']))[0]
        except IndexError:
            puesto = g.puestos.create(id=r['puesto_id'], nombre=r['puesto_nombre'].decode('utf-8'))
        
        try:
            persona = list(g.personas.index.lookup(id=r['per_id']))[0]
        except IndexError:
            persona = g.personas.create(dni=r['per_dni'],
                                        nombre_y_apellido=r['per_nya'].decode('utf-8'),
                                        id=r['per_id'])

        g.nombramientos.create(persona, dependencia, { 'puesto_id': r['puesto_id'] })
        g.plantel.create(dependencia, puesto, { 'persona_id': r['per_id'] })

        LOG.info('%r - %r - %r %r' % (articulo, dependencia, puesto, persona,))
def interval():
    # file name will be stored in interval_file_name
    client = db_utils.connect()
    data = " ".join([item['cleaned_text'] for item in client.tweeter_db.updates.find({},{'cleaned_text':1,'_id':0})])
    create_word_cloud(data,"wc.png")
    """ We will be generating word cloud, after every iteration """

    pass
def __insert_new_user(user_id, date):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    db.users.insert_one({
        'user_id': user_id,
        'date': date,
        'fetched_events': []
    })
def load_log (path, filename, site_id, ip_addr):
    """Load and parse the csv log files, and insert them into the circuit
    and power_reading tables

    Two types of log files:
    (1) A Main Circuit log file    -- column headers in MAIN_LOG list
    (2) A Regular Circuit log file --                   REGR_LOG

    """
    try:
        file_obj = open(os.path.join(path, filename), "r")
        data = file_obj.read()
        file_obj.close()

        # memoize these here, to avoid calling get_or_create_circuit() too much
        circuit_pk_list = {} 

        data_dicts = [] # list of dicts to bulk insert
        for i, line in enumerate(data.splitlines()):
            log_data = line.split(',')
            if len(log_data) == MAIN_LEN or len(log_data) == REGR_LEN:
                
                # ignore the csv file header line
                if log_data[0] != """Time Stamp""":

                    # get the circuit id (circuit table pk)
                    machine_id = log_data[18]
                    circuit_id = '-'.join([machine_id, site_id, ip_addr]) 
                    if not circuit_pk_list.has_key(circuit_id):
                        circuit_pk = get_or_create_circuit(
                            machine_id, site_id, ip_addr,
                            (len(log_data) == len(MAIN_LOG)))
                        circuit_pk_list[circuit_id] = circuit_pk
                    circuit_pk = circuit_pk_list[circuit_id]

                    line_dict = parse_log_line (circuit_pk, log_data)
                    if None in line_dict:
                        print >> sys.stderr, ' '.join(["Error: bad content line",
                                                       i,
                                                       line,
                                                       "from file",
                                                       os.path.join(path,
                                                                    filename)])
                    else:
                        data_dicts.append( line_dict )

        if len(data_dicts) > 0:
            conn = connect(DBNAME, DBUSER, PWD)
            if conn:
                insert (conn, 'power_reading',
                        columns=data_dicts[0].keys(),
                        inserts=data_dicts,
                        close_conn=True)

    except IOError:
        print >> sys.stderr, "Error: could not open", os.path.join(path, filename)
def main():
    start_time = time.time()
    parser = get_parser()
    args = parser.parse_args()
    db_connection = db_utils.connect(args)

    if args.command == 'define_study_timeframe':
        study_timeframe_id = define_study_timeframe(
            db_connection, args.study_timeframe_name,
            args.study_timeframe_description, args.study_start_year,
            args.years_per_period, args.n_periods)
        if study_timeframe_id:
            print("Created study timeframe. id=", study_timeframe_id)
        return
    elif args.command == 'sample_timeseries':
        print('got sample_timeseries')
        return

    demand_scenario = args.demand_scenario
    month_sampling_frequency = args.month_sampling_frequency
    start_month = args.start_month
    hour_sampling_frequency = args.hour_sampling_frequency
    start_hour = args.start_hour
    """
    demand_scenario
        demand_timeseries (ref raw_timepoint)
    study_timeframe
      period
      period_all_timeseries
        raw_timeseries
            raw_timepoint
    SELECT *
    FROM switch.period
        JOIN switch.period_all_timeseries USING (period_id)
        JOIN switch.raw_timepoint USING (raw_timeseries_id)
        JOIN switch.demand_timeseries USING (raw_timepoint_id)
    WHERE demand_scenario_id = 31
        and period.study_timeframe_id = 1
    limit 10;
    
    INSERT INTO time_sample(
            time_sample_id, study_timeframe_id, name, method, description)
    VALUES (?, ?, ?, ?, ?);

    """

    sql = ("SELECT * FROM switch.demand_scenario WHERE demand_scenario_id = {}"
           ).format(args.demand_scenario)
    print(sql)

    df = pd.read_sql(sql, db_connection)
    print("demand_scenario: ", df)

    end_time = time.time()

    print('\nScript ran in %s seconds.' % (end_time - start_time))
def __get_number_date_events(event_date):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    res = db.ephemeris.count_documents({
        'date': event_date
    })

    db_utils.close(client)

    return res
def __user_exists(user_id, date):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    res = db.users.count_documents({
        'user_id': user_id,
        'date': date
    })

    db_utils.close(client)

    return res == 1
def __get_already_fetched_events(user_id, date):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    res = db.users.find_one({
        'user_id': user_id,
        'date': date
    })

    db_utils.close(client)

    return res['fetched_events']
def key_word_cloud(keyword='lockdown'):
    """ We will be generating word cloud, on keyword basis """
    #get the current date in the formate toquery
    dat_for_filter = time.strftime("%Y%m%d")
    client = db_utils.connect()
    data = " ".join([item['cleaned_text'] for item in client.tweeter_db.news_tweets.find({'$and':[{'cleaned_text':{'$regex':keyword,'$options':'i'}},
                                                                    {'tmstamp':{'$regex':'^'+dat_for_filter+'.*'}}]},{'cleaned_text':1,'_id':0})])
    client = None
    #need to drop keywords which match lockdown
    data = data.lower().replace(keyword,"")
    create_word_cloud(data, keyword+dat_for_filter+".png")

    pass
def __reset_user_fetched_events(user_id, date):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    db.users.find_one_and_update({
        'user_id': user_id,
        'date': date
    }, {
        '$set': {
            'fetched_events': []
        }
    })

    db_utils.close(client)
def __add_user_fetched_event(user_id, date, event_id):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    db.users.find_one_and_update({
        'user_id': user_id,
        'date': date
    }, {
        '$push': {
            'fetched_events': event_id
        }
    })

    db_utils.close(client)
Exemple #13
0
def main():
    #check if valid input
    if len(sys.argv)==2:
        #get database name and connect to it
        db.connect(sys.argv[1])
        if db.CON is not None:
            #print init message
            print(f"""Successfully connected to {db.NAME}\nType 'help' for more information.\n Make sure to call 'init' if this is a new database""")
            while True:
                #ead and parse user input
                user_in = input('>')
                print('\n')
                cmd = user_in.split(maxsplit=1)
                function = cmd[0].strip()
                
                if function == 'quit': break
                elif function == 'help': dbhelp()
                elif function == 'dbinfo': db.info()
                elif function == 'init': db.make_db()
                elif function == 'update' :db.update()
                else: print_result(function, parse(user_in))

        else: print('Could not connect.')
    else: print("Error: Please enter a database name.")
def main():
    conn = db_utils.connect('localhost', 'root', 'boletin_oficial')

    g = Graph()

    try:
        fuente = list(g.fuentes_de_informacion.index.lookup(nombre="DineroYPolitica.org"))[0]
    except IndexError:
        fuente = g.fuentes_de_informacion.create(nombre="DineroYPolitica.org")
        
    for r in db_utils.query_db(conn.cursor(), QUERY):
        # XXX TODO
        # partidos politicos
        # personas (fisicas, juridicas) ver que onda con los cuits/dnis. tabla providers
        # donaciones
        # listas?

        LOG.info('%r - %r - %r %r' % (articulo, dependencia, puesto, persona,))
Exemple #15
0
def main():
    conn = db_utils.connect('localhost', 'root', 'boletin_oficial')

    g = Graph()

    # ['articulo_id', 'articulo_texto', 'dependencia_id', 'dependencia_nombre', 'per_apellido', 'per_boletines', 'per_cuit', 'per_dni', 'per_domicilio_especial', 'per_estado_civil', 'per_id', 'per_nombre', 'per_nya', 'per_prefijo', 'per_sufijo', 'per_titulo', 'puesto_id', 'puesto_nombre']

    for r in db_utils.query_db(conn.cursor(), QUERY):

        articulo = g.articulos.create(
            id=r['articulo_id'], texto=r['articulo_texto'].decode('utf-8'))

        try:
            dependencia = list(
                g.dependencias.index.lookup(id=r['dependencia_id']))[0]
        except IndexError:
            dependencia = g.dependencias.create(
                id=r['dependencia_id'],
                nombre=r['dependencia_nombre'].decode('utf-8'))

        try:
            puesto = list(g.puestos.index.lookup(id=r['puesto_id']))[0]
        except IndexError:
            puesto = g.puestos.create(
                id=r['puesto_id'], nombre=r['puesto_nombre'].decode('utf-8'))

        try:
            persona = list(g.personas.index.lookup(id=r['per_id']))[0]
        except IndexError:
            persona = g.personas.create(
                dni=r['per_dni'],
                nombre_y_apellido=r['per_nya'].decode('utf-8'),
                id=r['per_id'])

        g.nombramientos.create(persona, dependencia,
                               {'puesto_id': r['puesto_id']})
        g.plantel.create(dependencia, puesto, {'persona_id': r['per_id']})

        LOG.info('%r - %r - %r %r' % (
            articulo,
            dependencia,
            puesto,
            persona,
        ))
def __get_random_event_not_fetched(date, already_fetched):
    client = db_utils.connect()
    db = client[skill_config.DB_NAME]

    events = db.ephemeris.aggregate([
        {
            '$match': {
                'date': date,
                '_id': {
                    '$nin': already_fetched
                }
            }
        },
        {
            '$sample': { 'size': 1 }
        }
    ])

    db_utils.close(client)

    return list(events)[0]
def get_or_create_circuit (machine_id, site_id, ip_addr, is_main=False):
    """Lookup the machine_id, site_id and ip_addr in the circuit table
    and return its pk, creating a new entry only if the combination
    doesn't already exist"""

    conn = connect(DBNAME, DBUSER, PWD)
    if conn:
        circuit_data = {'machine_id':machine_id,
                        'site_id':site_id,
                        'ip_addr':ip_addr,
                        'main_circuit':is_main}
        res = search (conn,
                      """SELECT pk FROM circuit WHERE
                      machine_id = %(machine_id)s and
                      site_id = %(site_id)s and
                      ip_addr = %(ip_addr)s""",
                      circuit_data)
        if res is not None and len(res) > 0:
            return res[0][0]
        else:
            insert (conn, 'circuit',
                    columns=circuit_data.keys(),
                    inserts=[circuit_data])
            return get_or_create_circuit (machine_id, site_id, ip_addr)
Exemple #18
0
from datetime import datetime
import json
import sqlalchemy
from sqlalchemy.orm import sessionmaker, relationship

from conf import POSTGRES_DB, POSTGRES_HOST, POSTGRES_PASSWORD, POSTGRES_PORT, POSTGRES_USER
from conf import get_logger
from db_utils import connect, get_id, check_student_at_course_session
from rmq_utils import get_channel

logger = get_logger('checker')
""" Create a session with the Postgre database """
con, db_meta = connect(user=POSTGRES_USER,
                       password=POSTGRES_PASSWORD,
                       database_name=POSTGRES_DB,
                       host=POSTGRES_HOST,
                       port=POSTGRES_PORT)

Session = sessionmaker(bind=con)
session = Session()
""" Create a channel to RabbitMQ """
channel = get_channel()
""" Message handler """


def message_handler(ch, method, properties, body):
    msg = json.loads(body)
    task = msg.get('task')
    if task == 'best_labels':
        """Save the best labels into the attendance table"""
        labels: list = msg.get('labels')
Exemple #19
0
# This is the example of main program file which imports entities,
# connects to the database, drops/creates specified tables
# and populate some data to the database

from pony.orm import *  # or just import db_session, etc.
import all_entities  # This command make sure that all entities are imported
from base_entities import db  # Will bind this database

from db_settings import current_settings  # binding params

db.bind(*current_settings['args'], **current_settings['kwargs'])

from db_utils import connect
from db_loading import populate_database

if __name__ == '__main__':
    sql_debug(True)
    connect(db, drop_and_create='ALL') # drop_and_create=['Topic', 'Comment'])
    populate_database()
Exemple #20
0
def get_all_data():
    client = db_utils.connect()
    agg1 = [
        {
            '$match': {
                '$and': [{
                    'dateannounced': {
                        '$ne': ""
                    }
                }, {
                    'currentstatus': 'Hospitalized'
                }]
            }
        },  # reportedOn, status
        {
            '$group': {
                '_id': '$dateannounced',
                'count': {
                    '$sum': 1
                }
            }
        },
        {
            '$project': {
                'dateannounced': 1,
                'count': 1,
                'date': {
                    '$dateFromString': {
                        'dateString': '$_id',
                        'format': "%d/%m/%Y"
                    }
                }
            }
        },
        {
            '$sort': {
                'date': 1
            }
        }
    ]
    data_confirmed = list(client.covid_db.raw_data.aggregate(agg1))

    agg2 = [{
        '$match': {
            '$and': [{
                'dateannounced': {
                    '$ne': ""
                }
            }, {
                'currentstatus': 'Recovered'
            }]
        }
    }, {
        '$group': {
            '_id': '$dateannounced',
            'count': {
                '$sum': 1
            }
        }
    }, {
        '$project': {
            'dateannounced': 1,
            'count': 1,
            'date': {
                '$dateFromString': {
                    'dateString': '$_id',
                    'format': "%d/%m/%Y"
                }
            }
        }
    }, {
        '$sort': {
            'date': 1
        }
    }]
    data_recovered = list(client.covid_db.raw_data.aggregate(agg2))

    agg3 = [{
        '$match': {
            '$and': [{
                'dateannounced': {
                    '$ne': ""
                }
            }, {
                'currentstatus': 'Deceased'
            }]
        }
    }, {
        '$group': {
            '_id': '$dateannounced',
            'count': {
                '$sum': 1
            }
        }
    }, {
        '$project': {
            'dateannounced': 1,
            'count': 1,
            'date': {
                '$dateFromString': {
                    'dateString': '$_id',
                    'format': "%d/%m/%Y"
                }
            }
        }
    }, {
        '$sort': {
            'date': 1
        }
    }]
    data_deceased = list(client.covid_db.raw_data.aggregate(agg3))

    # to get hospitalized data of all the states for current date
    #agg4=[{'$match':{'$and':[{'dateannounced':t_date},  {'currentstatus':'Hospitalized'} ]}},
    #     {'$group': {'_id': '$detectedstate','count': { '$sum': 1 }}},
    #     { '$sort' : { 'count' : -1} }
    #    ]
    #data_statewise = list(coln.aggregate(agg4))
    client = None
    return [data_confirmed, data_recovered, data_deceased]
Exemple #21
0
def connect(cass_ip, cass_kp):
    db.connect(cass_ip, cass_kp)
Exemple #22
0
def main():
    client = db_utils.connect()
    data = []
    dat_for_filter = time.strftime("%Y%m%d")
    data.append(" ".join([
        item['cleaned_text'] for item in client.tweeter_db.news_tweets.find(
            {
                '$and': [{
                    'cleaned_text': {
                        '$regex': 'lockdown',
                        '$options': 'i'
                    }
                }, {
                    'tmstamp': {
                        '$regex': '^' + dat_for_filter + '.*'
                    }
                }]
            }, {
                'cleaned_text': 1,
                '_id': 0
            })
    ]))
    data.append(". ".join([
        item['cleaned_text'] for item in client.tweeter_db.news_tweets.find(
            {
                '$and': [{
                    'cleaned_text': {
                        '$regex': 'lockdown',
                        '$options': 'i'
                    }
                }, {
                    'tmstamp': {
                        '$regex': '^' + dat_for_filter + '.*'
                    }
                }]
            }, {
                'cleaned_text': 1,
                '_id': 0
            })
    ]))
    client = None
    summary_text = ""
    if len(data) > 0:
        for text in data:
            # 1 Create the word frequency table
            freq_table = _create_frequency_table(text)
            '''
            We already have a sentence tokenizer, so we just need
            to run the sent_tokenize() method to create the array of sentences.
            '''

            # 2 Tokenize the sentences
            sentences = sent_tokenize(text)

            # 3 Important Algorithm: score the sentences
            sentence_scores = _score_sentences(sentences, freq_table)

            # 4 Find the threshold
            threshold = _find_average_score(sentence_scores)

            # 5 Important Algorithm: Generate the summary
            summary = _generate_summary(sentences, sentence_scores,
                                        1.5 * threshold)
            summary_text = summary_text + " " + summary
            #print(summary)

    print(
        f"lenght of the text: {len(summary_text)} complete summary : {summary_text}"
    )
    PIL_example.generate_image(summary_text)
Exemple #23
0
def pie_chart():

    agg4 = [{
        '$match': {
            '$and': [{
                'dateannounced': t_date
            }, {
                'currentstatus': 'Hospitalized'
            }]
        }
    }, {
        '$group': {
            '_id': '$detectedstate',
            'count': {
                '$sum': 1
            }
        }
    }, {
        '$sort': {
            'count': -1
        }
    }]

    client = db_utils.connect()
    data_confirmed = list(client.covid_db.raw_data.aggregate(agg4))
    client = None

    if len(data_confirmed) == 0:
        return ""

    x = [k['_id'] for k in data_confirmed]
    y = [k['count'] for k in data_confirmed]

    #plt
    #fig = plt.figure(figsize=(30,18))
    #ax1 = fig.add_axes([0,0,1,1])  #rect − A 4-length sequence of [left, bottom, width, height] quantities.
    fig, ax1 = plt.subplots(figsize=(30, 18), subplot_kw=dict(aspect="equal"))
    ax1.axis('equal')
    states = x  #['C', 'C++', 'Java', 'Python', 'PHP']
    counts = y  #[23,17,35,29,12]
    states = [f"{x[i]} ({y[i]})" for i in range(len(x))]

    # explode 1st slice
    explode = [0] * (len(x) - 1)
    explode.insert(0, 0.1)
    explode = tuple(explode)

    wedges, texts, autotexts = ax1.pie(
        counts,
        explode=explode,
        labels=states,
        shadow=False,
        autopct='%1.2f%%',
        rotatelabels=True)  #startangle=-40,wedgeprops=dict(width=0.5)
    ax1.set_title(
        f"Pie chart showing statewise infected counts for current date. The total patients count is {sum(counts)}"
    )
    #ax1.text(3, 8, 'boxed italics text in data coords', style='italic', bbox = {'facecolor': 'red'})
    #ax1.annotate('annotate', xy = (2, 1), xytext = (3, 4),arrowprops = dict(facecolor = 'black', shrink = 0.05))
    ax1.legend(wedges,
               states,
               title=f"States ({sum(counts)})",
               loc="upper right",
               bbox_to_anchor=(0.6, 0.1, 0.5, 1))
    #plt.show()
    plt.setp(autotexts, size=14, weight="bold")
    plt.savefig(img_dir + "Confirmed_statewise.png",
                facecolor='w',
                edgecolor='w')
Exemple #24
0
def get_age_graph():
    fig, ax = plt.subplots(figsize=(30, 18))

    def convert_ages_to_int(data):
        y1 = []
        for h in data:
            #print(h)
            try:
                v = int(h['agebracket'])
                y1.append(v)
            except:
                #print(h['agebracket'])
                if h['agebracket'] == "":
                    #y1.append(0)
                    pass
                elif (len(h['agebracket']) > 1) and (h['agebracket'].find("-")
                                                     > -1):
                    v = [
                        int(h['agebracket'].split("-")[0]),
                        int(h['agebracket'].split("-")[1])
                    ]
                    y1.append(mean(v))
        return y1

    client = db_utils.connect()
    hospitalized = list(
        client.covid_db.raw_data.find(
            {
                '$and': [{
                    'dateannounced': {
                        '$ne': ""
                    }
                }, {
                    'currentstatus': 'Hospitalized'
                }]
            }, {
                'agebracket': 1,
                '_id': 0
            }))
    hosp_data = convert_ages_to_int(hospitalized)

    hospitalized = list(
        client.covid_db.raw_data.find(
            {
                '$and': [{
                    'dateannounced': {
                        '$ne': ""
                    }
                }, {
                    'currentstatus': 'Recovered'
                }]
            }, {
                'agebracket': 1,
                '_id': 0
            }))
    reco_data = convert_ages_to_int(hospitalized)

    hospitalized = list(
        client.covid_db.raw_data.find(
            {
                '$and': [{
                    'dateannounced': {
                        '$ne': ""
                    }
                }, {
                    'currentstatus': 'Deceased'
                }]
            }, {
                'agebracket': 1,
                '_id': 0
            }))
    deceas_data = convert_ages_to_int(hospitalized)
    client = None

    age_bucket = [
        0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
        90, 95, 100, 105, 110, 115, 120
    ]
    plt.hist(
        hosp_data,
        age_bucket,
        label='hospitalized',
        histtype='bar',
        rwidth=0.8
    )  # calling hist function instead of plot or bar to plot histogram
    plt.hist(reco_data,
             age_bucket,
             label='recovered',
             histtype='bar',
             rwidth=0.8)
    plt.hist(deceas_data, age_bucket, label='dead', histtype='bar', rwidth=0.8)

    plt.xlabel("Age buckets with interval of 5.")
    plt.ylabel("Number of the patients")
    plt.title(
        "Histogram of number of age wise hospitalized, recovred and deceased covid patients."
    )
    plt.legend()
    #plt.show()
    plt.savefig(img_dir + "Age wise distribution.png",
                facecolor='w',
                edgecolor='w',
                orientation='portrait')