예제 #1
0
import urllib2
from extraction.Landmark import RuleSet, flattenResult
from learning.PageManager import PageManager
import codecs
import chardet
import shutil
from bs4 import BeautifulSoup
import copy

# routing for API endpoints, generated from the models designated as API_MODELS
from angular_flask.core import api_manager
from angular_flask.models import *

for model_name in app.config['API_MODELS']:
    model_class = app.config['API_MODELS'][model_name]
    api_manager.create_api(model_class, methods=['GET', 'POST'])

session = api_manager.session

def download_url(project_folder, page_url):
    files = next(os.walk(os.path.join(app.static_folder, 'project_folders', project_folder)))[2]
    file_name = 'page_' + str(len(files) + 1) + ".html"


    file_location = os.path.join(app.static_folder, 'project_folders', project_folder, file_name)

    req = urllib2.urlopen(page_url)
    page_contents = req.read()

    # Need to figure out the encoding issues for this!
    # file_location = os.path.join(app.static_folder, 'project_folders', project_folder, file_name)
예제 #2
0
from learning.PageManager import PageManager
from learning.DivListLearner import DivListLearner
import codecs
import chardet
import shutil
from bs4 import BeautifulSoup
import copy

# routing for API endpoints, generated from the models designated as API_MODELS
from angular_flask.core import api_manager
from angular_flask.models import *
from angular_flask.settings import LEARN_LISTS

for model_name in app.config['API_MODELS']:
    model_class = app.config['API_MODELS'][model_name]
    api_manager.create_api(model_class, methods=['GET', 'POST'])

session = api_manager.session

def download_url(project_folder, page_url):
    files = next(os.walk(os.path.join(app.static_folder, 'project_folders', project_folder)))[2]
    file_name = 'page_' + str(len(files) + 1) + ".html"


    file_location = os.path.join(app.static_folder, 'project_folders', project_folder, file_name)

    req = urllib2.Request(page_url, headers={'User-Agent' : "Magic Browser"}) 
    con = urllib2.urlopen(req)
    page_contents = con.read()

    # Need to figure out the encoding issues for this!