Example #1
0
def source_id(engine):
    df = connection.select(engine, 'dim_sources', ['id', 'alias'])
    return df
Example #2
0
def county_id(engine):
    df = connection.select(engine, 'dim_county', ['id','name'])
    return df
Example #3
0
import numpy as np
import pandas as pd
import connection as conn
import matplotlib.pyplot as plt

cnxn, crsr, engine = conn.connect()
df = conn.select(engine, 'vw_cases')
conn.disconnect(cnxn, crsr, engine)

df = df[df['name'] == 'Dallas']

delta = df['report_date'].max() - df['report_date'].min()
start = 1
x_data = np.array([i for i in range(start, delta.days + 1)])
y_data = df['cases'].tail(delta.days - start + 1).to_numpy()

log_x_data = np.log(x_data)
log_y_data = np.log(y_data)

curve_fit = np.polyfit(x_data, log_y_data, 1)
print(curve_fit)

y = np.exp(curve_fit[1]) * np.exp(curve_fit[0] * x_data)
plt.plot(x_data, y_data, 'o')
plt.plot(x_data, y)
plt.show()
Example #4
0
def getArticleData(articles_pkg):

    source = articles_pkg['site']

    logging.info("Getting Article Content for {}".format(source))

    contents = articles_pkg['contents_selector']
    articles = articles_pkg['articles']
    hlSel = _defineSel(articles_pkg['content_hl'])
    imgSel = _defineSel(articles_pkg['content_img'])
    linkSel = _defineSel(articles_pkg['content_link'])
    provider = articles_pkg['farm']
    article_host = '{}_article'.format(urlparse(source).netloc)

    output = []
    contentDriver = SessionManager(host=article_host,
                                   chrome=_CHECKCHROME,
                                   chrome_path=_CHROME_PATH,
                                   chromedriver_path=_CHROMEDRIVER_PATH)

    for article in articles:
        try:
            #contentDriver.driver.implicitly_wait(15)
            contentDriver.driver.get(article)
            time.sleep(15)
        except Exception as e:
            logging.error("Problem getting: {} - {}. Moving on".format(
                article, e))
            continue

        soup = contentDriver.requestParsed()
        content_soup = soup.select(contents)
        if content_soup != []:
            try:
                for c in content_soup:
                    if type(hlSel) == list:
                        hl = c.attrs[hlSel[0]] if len(hlSel) < 2 else c.select(
                            hlSel[0])[0].attrs[hlSel[1]]
                    else:
                        hl = c.select(hlSel)[0].text
                    ln = c.attrs[linkSel[0]] if len(linkSel) < 2 else c.select(
                        linkSel[0])[0].attrs[linkSel[1]]
                    img = c.attrs[imgSel[0]] if len(imgSel) < 2 else c.select(
                        imgSel[0])[0].attrs[imgSel[1]]

                    if 'background' in img:
                        img = parse_qs(
                            urlparse(img[img.find("(") +
                                         1:img.find(")")]).query
                        )['url'][0]  # hack to extract revcontent img urls
                    if 'trends.revcontent' in ln:
                        ln = _getFullURL(ln)

                    output.append({
                        'headline': hl,
                        'link': ln,
                        'img': img,
                        "provider": provider,
                        "source": source,
                        "orig_article": article,
                        "from_ip": _IPADDR
                    })
            except Exception as e:
                logging.warning(
                    "Could not get contents of these native ads on {0} - {1}: {2}"
                    .format(source, article, e))
        else:
            logging.warning(
                "content soup was empty for {} - {}. Saving a screenshot".
                format(source, article))
            # save screenshot
            contentDriver.screenshot(source)
            continue
    if output == []:
        logging.error('Recieved no content from {}'.format(article_host))
    return output
Example #5
0
def lista_alunos():
    alunos = select('*', 'alunos')
    return jsonify(alunos)