def set_up_docx_inline(url): from io import BytesIO from requests import get as requestsget response = requestsget(url) img = BytesIO(response.content) return img
def scrape_fr_google(self, symbol, correct_url): print_debug_stmt('scrape_fr_google') print_debug_stmt('correct_url') print_debug_stmt(correct_url) resp = requestsget(correct_url) print_debug_stmt('resp.status_code') print_debug_stmt(resp.status_code) if str(resp.status_code) == '200': parser = fromstring(resp.text) raw_html = parser.cssselect('#ires > ol div.g:nth-child(1)')[0] # dive must somehow relatable to finance (that his box is realy about the finance) if 'Google Finance' in raw_html.text_content(): # semantic match #1 FOLLOW-UP (begin) span1st = raw_html.cssselect('h3 span')[0] print_debug_stmt('span1st') print_debug_stmt(span1st) print_debug_stmt('str(span1st.text_content())') print_debug_stmt(str(span1st.text_content())) company_name = span1st.text_content().lstrip('- ') print_debug_stmt('company_name') print_debug_stmt(company_name) tbl1st = raw_html.cssselect('table')[0] td1st = tbl1st.cssselect('td')[0] b1st = tbl1st.cssselect('b')[0] # at last, got the selector correct and price extracted last_trade_price = b1st.text_content() span2nd = tbl1st.cssselect('span')[1] last_trade_time = span2nd.text_content() year_atthemoment = date.today().strftime('%Y') last_trade_time = last_trade_time + ' ' + str(year_atthemoment) # this line most match, in position and in content (semantically) w # the columns header stored in global constants (COL_NAMES) -- right # data must go to the right column temptupl = (symbol, company_name, 'google', last_trade_time, last_trade_price) # semantic match #1 FOLLOW-UP (end) temprowlist = [] temprowlist.append(temptupl) tempdf = DataFrame(temprowlist, columns=COL_NAMES) return tempdf else: raise ScrapeFailedException() else: raise ScrapeFailedException()
def attachment_to_email(attachment_res): from io import BytesIO from email.mime.base import MIMEBase from email.encoders import encode_base64 from requests import get as requestsget temp_bytes = BytesIO(requestsget(attachment_res['DOWNLOAD_URL']).content) p = MIMEBase('application', 'octet-stream') p.set_payload(temp_bytes.read()) encode_base64(p) filename = attachment_res['NAME'] p.add_header('Content-Disposition', 'attachment; filename= %s' % filename) return p
def set_up_MIME_IMAGE(url, file_format): from io import BytesIO from requests import get as requestsget from PIL import Image from email.mime.image import MIMEImage stream_bytes = BytesIO() response = requestsget(url) img = Image.open(BytesIO(response.content)) img.save(stream_bytes, file_format) stream_bytes.seek(0) img_obj = stream_bytes.read() return MIMEImage(img_obj)
def validateExternal(email): response = requestsget(apiurl.format(key=key, email=email)) if response.status_code != 200: return None, "{}: Status invalido. Contacte al administrador".format( response.status_code) return response.json(), None