import urllib2 from extraction.Landmark import RuleSet, flattenResult from learning.PageManager import PageManager import codecs import chardet import shutil from bs4 import BeautifulSoup import copy # routing for API endpoints, generated from the models designated as API_MODELS from angular_flask.core import api_manager from angular_flask.models import * for model_name in app.config['API_MODELS']: model_class = app.config['API_MODELS'][model_name] api_manager.create_api(model_class, methods=['GET', 'POST']) session = api_manager.session def download_url(project_folder, page_url): files = next(os.walk(os.path.join(app.static_folder, 'project_folders', project_folder)))[2] file_name = 'page_' + str(len(files) + 1) + ".html" file_location = os.path.join(app.static_folder, 'project_folders', project_folder, file_name) req = urllib2.urlopen(page_url) page_contents = req.read() # Need to figure out the encoding issues for this! # file_location = os.path.join(app.static_folder, 'project_folders', project_folder, file_name)
from learning.PageManager import PageManager from learning.DivListLearner import DivListLearner import codecs import chardet import shutil from bs4 import BeautifulSoup import copy # routing for API endpoints, generated from the models designated as API_MODELS from angular_flask.core import api_manager from angular_flask.models import * from angular_flask.settings import LEARN_LISTS for model_name in app.config['API_MODELS']: model_class = app.config['API_MODELS'][model_name] api_manager.create_api(model_class, methods=['GET', 'POST']) session = api_manager.session def download_url(project_folder, page_url): files = next(os.walk(os.path.join(app.static_folder, 'project_folders', project_folder)))[2] file_name = 'page_' + str(len(files) + 1) + ".html" file_location = os.path.join(app.static_folder, 'project_folders', project_folder, file_name) req = urllib2.Request(page_url, headers={'User-Agent' : "Magic Browser"}) con = urllib2.urlopen(req) page_contents = con.read() # Need to figure out the encoding issues for this!