예제 #1
0
def run(md_file,
        name,
        email,
        no_contents,
        template_dir
        ):

    title, summary, body = get_content.get_content(md_file)
    
    r = False
    # remove title from the body string
    filename = body.replace(title, '')

    if md_file[-3:].lower() == 'rmd':
        warning('RMD file detected. Processing...')
        r = True
        filename = process_r_markdown(md_file)

    # the autoescape should really be set to True, but if I set it to True, html doesn't render...
    env = Environment(loader = FileSystemLoader(template_dir), 
                                   autoescape = False
                                   )
    template = env.get_template('report_template.html')

    output_from_parsed_template = template.render(content=get_html.render_markdown_content(filename),
                                                  title=title,
                                                  summary=summary,
                                                  author_name=name,
                                                  author_email=email,
                                                  no_contents=no_contents,
                                                  r=r
                                                  )

    return output_from_parsed_template
예제 #2
0
def crawl_tamsu(
        num=randint(6621, 12128), path='data/tamsu/', header_name='tamsu_'):
    if not os.path.exists(path):
        os.makedirs(path)
    urls = get_urls_new(VNEXPRESS_TAMSU, num)
    for url in urls:
        new = get_content(url)
        if new != '':
            print("Write url: {}".format(url))
            save_content(path, header_name, new)
예제 #3
0
def crawl_thegioi(
        num=randint(6621, 12128), path='data/thegioi/',
        header_name='thegioi_'):
    urls = get_urls_new(VNEXPRESS_THEGIOI, num)
    if not os.path.exists(path):
        os.makedirs(path)
    for url in urls:
        new = get_content(url)
        if new != '':
            print("Write url: {}".format(url))
            save_content(path, header_name, new)
예제 #4
0
def crawl_giaoduc(
        num=randint(6621, 12128), path='data/giaoduc/',
        header_name='giaoduc_'):
    if not os.path.exists(path):
        os.makedirs(path)
    urls = get_urls_new(VNEXPRESS_GIAODUC, num)
    for url in urls:
        new = get_content(url)
        if new != '':
            print("Write url: {}".format(url))
            save_content(path, header_name, new)
예제 #5
0
def crawl_phapluat(
        num=randint(6621, 12128), path='data/phapluat/',
        header_name='phapluat_'):
    if not os.path.exists(path):
        os.makedirs(path)
    urls = get_urls_new(VNEXPRESS_PHAPLUAT, num)
    for url in urls:
        new = get_content(url)
        if new != '':
            print("Write url: {}".format(url))
            save_content(path, header_name, new)
예제 #6
0
def crawl_giaitri(
        num=randint(6621, 12128), path='data/giaitri/',
        header_name='giaitri_'):
    urls = get_urls_new(VNEXPRESS_GIAITRI, num)
    import os
    if not os.path.exists(path):
        os.makedirs(path)
    for url in urls:
        new = get_content(url)
        if new != '':
            print("Write url: {}".format(url))
            save_content(path, header_name, new)
예제 #7
0
def main():
    print("""
    Welcome, you are using the ContentGetter. Though ContentGetter
    uses Python Cryptography to encrypt your saved information, please
    do not use it to store important passwords on sites such as Bank 
    , Amazon, Facebook, or Instagram where Hackers could be a potential
    threat.

    It's fine to use it to store information with your CV, if you are
    copying and pasting all the time to fill out only application, this
    could make your life easier!
    """)

    print("Press Y/y to continue; press any key to quit")
    user_input = input("Are you going to use the ContentGetter?> ")
    if user_input.lower() == 'y':
        # continue
        path = get_path()
        file_name = get_file_name()
        while True:
            user_want = input("Would you like to (add), (get), (del) content?> (Press any key to quit):> ")
            if user_want.lower() == "add":
                # add account and its content to the file
                account = get_account()
                content = enter_content()
                content_key_writer(path, file_name, account, content)

            elif user_want.lower() == "get":
                # get the content from the account in the file
                account = get_account()
                get_content(path, file_name, account)
            elif user_want.lower() == 'del':
                # delete the account from file
                account = get_account()
                del_account(path, file_name, account)
            else:
                break

    else:
        print("Thank you for using ContentGetter.")
예제 #8
0
def get_table_droppingodds():
    referer = "https://www.google.com"
    url = "https://www.arbworld.net/en/droppingodds"
    r = get_content.get_content(url, referer)
    soup = BeautifulSoup(r.text, 'lxml')
    events_file = filein2

    table1 = soup.find("table", {'class': 'grid'})
    for cof_list1 in table1.select("tr", {'class': 'heading'}):
        header_cof = []
        for cof1 in cof_list1.select("td"):
            header_cof.append(cof1.get_text(separator=' '))

    with open(events_file, 'w') as csvfile:
        writercol = csv.DictWriter(csvfile, fieldnames=header_cof[3:14])
        writercol.writeheader()

    table = soup.find('table', {'id': 'matches'})

    for cof_list in table.select("tr", {'class': 'belowHeader'}):
        cof_list_list = []
        for cof in cof_list.select("td"):
            cof_list_list.append(cof.get_text(separator=' '))

        if len(cof_list_list[2:13]) == 0: continue
        if float(cof_list_list[6].split()[0]) - float(cof_list_list[6].split()[1]) >= drop:
            with open(events_file, 'a') as csvfile:
                writerrow = csv.writer(csvfile)
                row = cof_list_list[2], cof_list_list[3], cof_list_list[4], cof_list_list[5], cof_list_list[
                    6], "", "", "", "", cof_list_list[11], cof_list_list[12]
                writerrow.writerow(row)

        if float(cof_list_list[8].split()[0]) - float(cof_list_list[8].split()[1]) >= drop:
            with open(events_file, 'a') as csvfile:
                writerrow = csv.writer(csvfile)
                row = cof_list_list[2], cof_list_list[3], cof_list_list[4], cof_list_list[5], "", "", cof_list_list[
                    8], "", "", cof_list_list[11], cof_list_list[12]
                writerrow.writerow(row)

        if float(cof_list_list[10].split()[0]) - float(cof_list_list[10].split()[1]) >= drop:
            with open(events_file, 'a') as csvfile:
                writerrow = csv.writer(csvfile)
                row = cof_list_list[2], cof_list_list[3], cof_list_list[4], cof_list_list[5], "", "", "", "", \
                      cof_list_list[10], cof_list_list[11], cof_list_list[12]
                writerrow.writerow(row)
예제 #9
0
def get_table_1x2():
    referer = "https://www.google.com"
    url = "https://www.arbworld.net/en/moneyway/mw-1-x-2"
    r = get_content.get_content(url, referer)
    soup = BeautifulSoup(r.text, 'lxml')
    events_file = filein1

    table1 = soup.find("table", {'class': 'grid'})
    for cof_list1 in table1.select("tr", {'class': 'heading'}):
        header_cof = []
        for cof1 in cof_list1.select("td"):
            header_cof.append(cof1.get_text(separator=' '))

    with open(events_file, 'w') as csvfile:
        writercol = csv.DictWriter(csvfile, fieldnames=header_cof[3:14])
        writercol.writeheader()

    table = soup.find('table', {'id': 'matches'})

    for cof_list in table.select("tr", {'class': 'belowHeader'}):
        cof_list_list = []
        for cof in cof_list.select("td"):
            cof_list_list.append(cof.get_text(separator=' '))

        if len(cof_list_list[2:13]) == 0: continue

        if int(cof_list_list[12].replace('€', '').replace(' ', '')) > vol:
            if float(cof_list_list[9].replace('€', '').replace('%', '').split()[0]) > percent:
                if float(cof_list_list[6]) > cof_cof:
                    with open(events_file, 'a') as csvfile:
                        writerrow = csv.writer(csvfile)
                        writerrow.writerow(cof_list_list[2:13])
            if float(cof_list_list[10].replace('€', '').replace('%', '').split()[0]) > percent:
                if float(cof_list_list[7]) > cof_cof:
                    with open(events_file, 'a') as csvfile:
                        writerrow = csv.writer(csvfile)
                        writerrow.writerow(cof_list_list[2:13])
            if float(cof_list_list[11].replace('€', '').replace('%', '').split()[0]) > percent:
                if float(cof_list_list[8]) > cof_cof:
                    with open(events_file, 'a') as csvfile:
                        writerrow = csv.writer(csvfile)
                        writerrow.writerow(cof_list_list[2:13])
예제 #10
0
def amin(start_url='http://pstu.ru'):
    content = get_content(start_url)
    tokens = get_tokens(content)
    print(3333, tokens)
    exit(0)
    for token in tokens:
        if is_xlsx_link(token):
            xlsx_content = xlsx_get_content(link_from_token(token))
            xlsx_tokens = tokenize_xlsx(xlsx_content)
            for cell in xlsx_content:
                time, lecture, teacher, room = parse(cell)
                rezult.append(link_from_token(token), xlsx_tokens, time,
                              lecture, teacher, room)
        elif is_normal_link(link_from_token(token)):
            print(2, token)
            # amin(link_from_token(token))
    #rezult.append(start_url, tokens)
    rezult.append(start_url)
    rezult.append(tokens)
    return rezult
예제 #11
0
from flask import Flask, render_template, request
import get_content
import url_renderer
import extract_content
import datetime
import extract_form

content = get_content.get_content(url_renderer.get_current_url())

app = Flask(__name__)


@app.route('/')
def presidency():
    start_time = datetime.datetime(2000, 1, 1, 19)

    header = extract_content.header(content)
    sections = [extract_content.section1(content), extract_content.section2(content), extract_content.section3(content),
                extract_content.section4(content)]

    for section in sections:
        for item in section.items:
            item.time = start_time.time()
            start_time += datetime.timedelta(minutes=item.duration)

    return render_template("sections.html", sections=sections, header=header, start_time=start_time)


@app.route('/', methods=['POST'])
def form_post():
    tree, section_title = extract_form.create_tree(request.form)
예제 #12
0
import get_content
from bs4 import BeautifulSoup
import csv


vol=700
percent=90
cof_cof=1.6

referer = "https://www.google.com"
url = "https://www.arbworld.net/en/moneyway/mw-overunder"
r = get_content.get_content(url,referer)
soup = BeautifulSoup(r.text, 'lxml')

events_file = "arb_over_under" + ".csv"


table1 = soup.find("table", {'class': 'grid'})
for cof_list1 in table1.select("tr", {'class': 'heading'}):
    header_cof = []
    for cof1 in cof_list1.select("td"):
        header_cof.append(cof1.get_text(separator=' '))

with open(events_file, 'w') as csvfile:
    writercol = csv.DictWriter(csvfile, fieldnames=header_cof[3:13])
    writercol.writeheader()

table = soup.find('table', {'id': 'matches' })

for cof_list in table.select("tr", {'class': 'belowHeader'}):
    cof_list_list = []
예제 #13
0
import get_content
from bs4 import BeautifulSoup
import json

url = "https://www.parimatch.com"
r = get_content.get_content(url, url).content
data = {}
soup = BeautifulSoup(r, 'lxml')
menu = soup.find('div', {'id': 'lobbyLeftHolder'})
list_group_of_sports = menu.find('div', {'id': 'lobbySportsHolder'})

for list_group in list_group_of_sports.find_all('ul', {'class': 'groups'}):
    group_name = list_group.findPrevious('a').get_text()
    data[group_name] = []
    for list_group_events in list_group.find_all('li'):
        for item in list_group_events.select("a"):
            name_group_events = item.get_text()
            id_group_events = item['hd']
            href_group_events = item['href']
            data[group_name].append({
                'name': name_group_events,
                'id': id_group_events,
                'href': href_group_events
            })

wfile = open('group_events_list.json', mode='w', encoding='UTF-8')
json.dump(data, wfile, indent=4, ensure_ascii=False)
wfile.close()
예제 #14
0
# -*- coding: utf-8 -*-
import re

import jieba.analyse

import get_content

for content, active_id in get_content.get_content():
    sentence = re.sub("<.*?>", "", content)
    ret = jieba.analyse.extract_tags(sentence,
                                     topK=20,
                                     withWeight=True,
                                     allowPOS=())
    active_id = active_id.split("/")[-1]
    key_words = ""
    for k, v in ret:
        if v > 0.05:
            key_words = key_words + k + ","
    print(key_words, active_id)
    break
from get_links import get_links
from get_content import get_html, get_date, get_content, save
from tqdm import tqdm

for i in tqdm(range(1, 570)):
    url = f'https://vnexpress.net/kinh-doanh/chung-khoan-p{i}'
    try:
        links = get_links(url)
        for link in links:
            try:
                soup = get_html(link)
                content = get_content(soup)
                date, year = get_date(soup)
                save(
                    link.replace("https://vnexpress.net/",
                                 "").replace(".html", ".txt"), content, year,
                    date, "vnexpress-data")
            except:
                print("error:", link)
    except:
        print("error:", url)
예제 #16
0
    def find_field(self):
        '''
        Finding out which unit field is going to be patched inside EC scope.
        '''
        if VERBOSE:
            print(
                "Into: find_field(): Finding out which unit field is going to be patched inside EC scope."
            )
        for OR in self.OR_info:
            OR["field_unit"] = []
            OR_path = OR["path"] + '.' + OR["name"]
            content = get_content.get_content(self.dsdt_splited, OR_path)
            for field in content.split("}")[:-1]:
                field = field.split('{')[1]  # Remove field header
                store_flag = False  # Is there any field that larger than 16 bits in this method?
                field_content_splln = field.split('\n')
                offset_bits = 0  # offset in bits
                name = ''
                size = 0
                for item in field_content_splln:
                    if ',' not in item:
                        # Skip empty line
                        continue
                    elif "Offset" not in item:
                        item_spl = item.split(',')
                        name = item_spl[0].strip()
                        size = int(item_spl[1].strip())
                        if size > 8 and name != '':
                            if offset_bits / 8 - int(offset_bits / 8) != 0:
                                print(FIELD_UNIT_OFFSET_ERR)
                                exit(2)
                            OR["field_unit"].append({
                                "name":
                                name,
                                "offset":
                                int(offset_bits / 8),
                                "size":
                                size,
                                "OR_path":
                                OR_path
                            })
                            store_flag = True
                        offset_bits += size
                    else:
                        item = item.strip()
                        offset = re.search(r'Offset \((.*)\)', item).group(1)
                        offset_bits = int(offset, 16) * 8

                if store_flag:
                    # Store this OperationRegion and its units
                    if OR["storage"] not in self.RW_method:
                        while True:
                            # This will generate a new R/W method name
                            letter = random.choice(
                                '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
                            OR['RE1B'] = 'R1B' + letter
                            OR['RECB'] = 'REB' + letter
                            OR['ERM2'] = 'MEM' + letter
                            OR['WE1B'] = 'W1B' + letter
                            OR['WECB'] = 'WRB' + letter
                            if (self.dsdt_content.find(OR['RE1B']) == -1 and OR['RE1B'] not in self.RW_method) \
                                    and (self.dsdt_content.find(OR['RECB']) == -1 and OR['RECB'] not in self.RW_method) \
                                    and (self.dsdt_content.find(OR['ERM2']) == -1 and OR['ERM2'] not in self.RW_method) \
                                    and (self.dsdt_content.find(OR['WE1B']) == -1 and OR['WE1B'] not in self.RW_method) \
                                    and (self.dsdt_content.find(OR['WECB']) == -1 and OR['WECB'] not in self.RW_method):
                                # Loop until there is nothing has the same name as our generated one
                                break

                        # Add the content of R/W method to self.RW_method
                        self.RW_method += RW_METHOD[0] + \
                            OR["path"] + RW_METHOD[1] +\
                            OR['RE1B'] + RW_METHOD[2] + \
                            OR['ERM2'] + RW_METHOD[3] + \
                            OR["storage"] + RW_METHOD[4] + \
                            OR['ERM2'] + RW_METHOD[5] + \
                            OR['RECB'] + RW_METHOD[6] + \
                            OR['RE1B'] + RW_METHOD[7] + \
                            OR['WE1B'] + RW_METHOD[8] + \
                            OR['ERM2'] + RW_METHOD[9] + \
                            OR["storage"] + RW_METHOD[10] + \
                            OR['ERM2'] + RW_METHOD[11] + \
                            OR['WECB'] + RW_METHOD[12] + \
                            OR['WE1B'] + RW_METHOD[13]
                    else:
                        # If OR['storage'] in self.RW_method and not in OR itself
                        # Then copy the read/write method from other OR which has the same storage
                        for _OR_ in self.OR_info:
                            if _OR_['storage'] == OR['storage']:
                                OR['RE1B'] = _OR_['RE1B']
                                OR['RECB'] = _OR_['RECB']
                                OR['ERM2'] = _OR_['ERM2']
                                OR['WE1B'] = _OR_['WE1B']
                                OR['WECB'] = _OR_['WECB']
                                break

        for OR in self.OR_info:
            if len(OR['field_unit']) < 1:
                self.OR_info.remove(OR)

        if "RECB" not in self.RW_method:
            print(NOT_NEED_TO_PATCH_MSG)
            exit(0)

        if VERBOSE:
            for OR in self.OR_info:
                print(OR['path'] + '.' + OR['name'] + ', ' + OR['storage'])
                for unit in OR['field_unit']:
                    print('  -', unit)
예제 #17
0
import get_content
import init

init.init()

with open(init.DEFAULT_URLS_FILE) as myfile:
    for url in list(myfile):
        get_content.get_content(url)

f = open(init.DEFAULT_HTML_FILE, 'a')
closed_tag = "\n</html>"
f.write(closed_tag)
f.close()
예제 #18
0
from  get_content import get_content
from send_mail import  send_mail

if __name__ == '__main__':
	data = get_content()
	send_mail('*****@*****.**','title',data)
from get_urls import get_urls
from get_content import get_content
from get_jsons import get_jsons
from get_comments import getComments, write_comments

if __name__ == "__main__":
    commentsData = []
    urls = get_urls()
    for i, url in enumerate(urls):
        print('Getting:', url)
        # get_response(str(i + 1), url)
        html_content = get_content(url)
        json_data = get_jsons(html_content)
        comments = getComments(json_data)
        for comment in comments:
            comment.insert(0, 'No.' + str(i) + ' url')
            commentsData.append(comment)

    write_comments(commentsData)