def source_id(engine): df = connection.select(engine, 'dim_sources', ['id', 'alias']) return df
def county_id(engine): df = connection.select(engine, 'dim_county', ['id','name']) return df
import numpy as np import pandas as pd import connection as conn import matplotlib.pyplot as plt cnxn, crsr, engine = conn.connect() df = conn.select(engine, 'vw_cases') conn.disconnect(cnxn, crsr, engine) df = df[df['name'] == 'Dallas'] delta = df['report_date'].max() - df['report_date'].min() start = 1 x_data = np.array([i for i in range(start, delta.days + 1)]) y_data = df['cases'].tail(delta.days - start + 1).to_numpy() log_x_data = np.log(x_data) log_y_data = np.log(y_data) curve_fit = np.polyfit(x_data, log_y_data, 1) print(curve_fit) y = np.exp(curve_fit[1]) * np.exp(curve_fit[0] * x_data) plt.plot(x_data, y_data, 'o') plt.plot(x_data, y) plt.show()
def getArticleData(articles_pkg): source = articles_pkg['site'] logging.info("Getting Article Content for {}".format(source)) contents = articles_pkg['contents_selector'] articles = articles_pkg['articles'] hlSel = _defineSel(articles_pkg['content_hl']) imgSel = _defineSel(articles_pkg['content_img']) linkSel = _defineSel(articles_pkg['content_link']) provider = articles_pkg['farm'] article_host = '{}_article'.format(urlparse(source).netloc) output = [] contentDriver = SessionManager(host=article_host, chrome=_CHECKCHROME, chrome_path=_CHROME_PATH, chromedriver_path=_CHROMEDRIVER_PATH) for article in articles: try: #contentDriver.driver.implicitly_wait(15) contentDriver.driver.get(article) time.sleep(15) except Exception as e: logging.error("Problem getting: {} - {}. Moving on".format( article, e)) continue soup = contentDriver.requestParsed() content_soup = soup.select(contents) if content_soup != []: try: for c in content_soup: if type(hlSel) == list: hl = c.attrs[hlSel[0]] if len(hlSel) < 2 else c.select( hlSel[0])[0].attrs[hlSel[1]] else: hl = c.select(hlSel)[0].text ln = c.attrs[linkSel[0]] if len(linkSel) < 2 else c.select( linkSel[0])[0].attrs[linkSel[1]] img = c.attrs[imgSel[0]] if len(imgSel) < 2 else c.select( imgSel[0])[0].attrs[imgSel[1]] if 'background' in img: img = parse_qs( urlparse(img[img.find("(") + 1:img.find(")")]).query )['url'][0] # hack to extract revcontent img urls if 'trends.revcontent' in ln: ln = _getFullURL(ln) output.append({ 'headline': hl, 'link': ln, 'img': img, "provider": provider, "source": source, "orig_article": article, "from_ip": _IPADDR }) except Exception as e: logging.warning( "Could not get contents of these native ads on {0} - {1}: {2}" .format(source, article, e)) else: logging.warning( "content soup was empty for {} - {}. Saving a screenshot". format(source, article)) # save screenshot contentDriver.screenshot(source) continue if output == []: logging.error('Recieved no content from {}'.format(article_host)) return output
def lista_alunos(): alunos = select('*', 'alunos') return jsonify(alunos)