def set_up_docx_inline(url):
    from io import BytesIO
    from requests import get as requestsget

    response = requestsget(url)
    img = BytesIO(response.content)
    return img
Beispiel #2
0
    def scrape_fr_google(self, symbol, correct_url):

        print_debug_stmt('scrape_fr_google')
        print_debug_stmt('correct_url')
        print_debug_stmt(correct_url)

        resp = requestsget(correct_url)
        print_debug_stmt('resp.status_code')
        print_debug_stmt(resp.status_code)

        if str(resp.status_code) == '200':

            parser = fromstring(resp.text)
            raw_html = parser.cssselect('#ires > ol div.g:nth-child(1)')[0]

            # dive must somehow relatable to finance (that his box is realy about the finance)
            if 'Google Finance' in raw_html.text_content():

                # semantic match #1 FOLLOW-UP (begin)
                span1st = raw_html.cssselect('h3 span')[0]
                print_debug_stmt('span1st')
                print_debug_stmt(span1st)
                print_debug_stmt('str(span1st.text_content())')
                print_debug_stmt(str(span1st.text_content()))
                company_name = span1st.text_content().lstrip('- ')

                print_debug_stmt('company_name')
                print_debug_stmt(company_name)

                tbl1st = raw_html.cssselect('table')[0]
                td1st = tbl1st.cssselect('td')[0]
                b1st = tbl1st.cssselect('b')[0]
                # at last, got the selector correct and price extracted
                last_trade_price = b1st.text_content()

                span2nd = tbl1st.cssselect('span')[1]
                last_trade_time = span2nd.text_content()
                year_atthemoment = date.today().strftime('%Y')
                last_trade_time = last_trade_time + ' ' + str(year_atthemoment)

                # this line  most match, in position and in content (semantically) w
                # the columns header stored in global constants (COL_NAMES) -- right
                # data must go to the right column
                temptupl = (symbol, company_name, 'google', last_trade_time,
                            last_trade_price)
                # semantic match #1 FOLLOW-UP (end)

                temprowlist = []
                temprowlist.append(temptupl)
                tempdf = DataFrame(temprowlist, columns=COL_NAMES)
                return tempdf

            else:
                raise ScrapeFailedException()

        else:
            raise ScrapeFailedException()
def attachment_to_email(attachment_res):
    from io import BytesIO
    from email.mime.base import MIMEBase
    from email.encoders import encode_base64
    from requests import get as requestsget

    temp_bytes = BytesIO(requestsget(attachment_res['DOWNLOAD_URL']).content)
    p = MIMEBase('application', 'octet-stream')
    p.set_payload(temp_bytes.read())
    encode_base64(p)
    filename = attachment_res['NAME']
    p.add_header('Content-Disposition', 'attachment; filename= %s' % filename)
    return p
def set_up_MIME_IMAGE(url, file_format):
    from io import BytesIO
    from requests import get as requestsget
    from PIL import Image
    from email.mime.image import MIMEImage

    stream_bytes = BytesIO()
    response = requestsget(url)
    img = Image.open(BytesIO(response.content))
    img.save(stream_bytes, file_format)
    stream_bytes.seek(0)
    img_obj = stream_bytes.read()

    return MIMEImage(img_obj)
def validateExternal(email):
    response = requestsget(apiurl.format(key=key, email=email))
    if response.status_code != 200:
        return None, "{}: Status invalido. Contacte al administrador".format(
            response.status_code)
    return response.json(), None