コード例 #1
0
def add_page_pair_to_database(from_page, to_page, limit):

    with db_lock:
        cou = session.query(Page.id).filter(Page.url == from_page).scalar()
        cou1 = session.query(Page.id).filter(Page.url == to_page).scalar()

        if cou is None:
            new_page_from = Page(url=from_page, text="", rank=0)
            session.add(new_page_from)
            session.flush()
            id0 = new_page_from.id
        else:
            id0 = cou

        if cou1 is None:
            allowed = limit < 1 or limit > session.query(Page).count()
            if not allowed:
                return
            new_page_to = Page(url=to_page, text="", rank=0)
            session.add(new_page_to)
            session.flush()
            id1 = new_page_to.id
        else:
            id1 = cou1

        new_relation = Relation(page_id=id0, destination_id=id1)
        # print(new_relation.page_id.id)
        session.add(new_relation)
        session.commit()
コード例 #2
0
def load_pages():
	"""Load pages from seed data into database"""

	with open("seed_data/pages.txt") as pages: 
		for row in pages: 
			page = row.rstrip().split("|")

			hidden = True if page[3] == "True" else False

			kwargs = dict(
			page_id = page[0],
			user_id = page[1],
			page = page[2], 
			hidden = hidden 
			)

			keys_to_remove = []

			for key in kwargs.keys(): 
				if kwargs[key] == "":
					keys_to_remove.append(key)

			for key in keys_to_remove:
				del kwargs[key]

			page = Page(**kwargs)

			db.session.add(page)

	db.session.commit()
コード例 #3
0
    def process_list(self, page):
        queue = self.application.queue
        logging.debug('Processing list %s' % page.url)
        processed = 0
        if page.state == Page.State.PARSED:
            with session_scope(self.application.Session) as session:
                for url in page.get_contents():
                    (page_exists, ), = session.query(
                        exists().where(Page.url == url))
                    if not page_exists:
                        if 'http' not in url:
                            url = self.application.config.SCRAPE_ROOT_PATH + url
                        subpage = Page(url=url)

                        session.add(subpage)
                        session.commit()
                        queue.add_page(subpage.page_id)
                        processed += 1
                page.state = Page.State.PROCESSED
            logging.debug('Processed list %s' % page.url)
            print(
                'Processed %s urls! Fetching movies & getting more movie urls...'
                % processed)
        else:
            logging.debug('Aleady processed list %s' % page.url)
コード例 #4
0
def create_book_page(page_text, page_image, email):
    """Create a pages of book"""

    book_id = get_book_id(email)

    page = Page(text= page_text, image=page_image, book_id = book_id)
    db.session.add(page)
    db.session.commit()
    return page
コード例 #5
0
 def add_page(self):
     params = self.request.params
     order = params.get('order', Page.get_children_count(self.master_key))
     parent = self._get_parent_key()
     page_type = params.get('type', None)
     if page_type:
         page = Page(parent=parent, page_type=page_type, order=int(order))
         self._get_page(page)
     else:
         self.error(400)
コード例 #6
0
def create_cover_page(page_text, cover_image, email):
    """Create a cover of book"""
    book_id = 0
    book_id_list = db.session.query(Book.id).all()
    for last_book in book_id_list:
        book_id = last_book
    cover_page = Page(text= page_text, cover_image=cover_image, book_id = book_id)

    db.session.add(cover_page)
    db.session.commit()

    return cover_page
コード例 #7
0
    def getPage(self, article, oldid=None):
        global wikiDatabase
        if article not in wikiDatabase:
            return NoPage(article=article, controller=self)

        if oldid:
            return OldPage(article=article,
                           wikitext=wikiDatabase[article][1][int(oldid)][0],
                           controller=self)

        return Page(article=article,
                    wikitext=wikiDatabase[article][0],
                    controller=self)
コード例 #8
0
ファイル: server.py プロジェクト: atrnh/portfolio-cms
def add_page():
    """Add a Page."""

    data = json.loads(request.data.decode())

    title = data.get('title')
    content = data.get('content')
    page = Page(title, content)

    db.session.add(page)
    db.session.commit()

    return get_page_json(page.id)
コード例 #9
0
ファイル: server.py プロジェクト: ellakcd/site_maker
def update_pages():
    """change what pages to display"""

    user_id = session["current_user"]
    user = User.query.get(user_id)
    new_pages = request.form.getlist("pages")
    print(new_pages)
    Page.query.filter_by(user_id=user_id).delete()

    for page in new_pages:
        kwargs = dict(user_id=user_id, page=page)
        db.session.add(Page(**kwargs))
    db.session.commit()

    print(user.pages)

    return redirect("users/{}/my_homepage".format(user_id))
コード例 #10
0
ファイル: admin.py プロジェクト: zouchao2010/cakeshop
    def post(self):
        name = self.get_argument("name", None)
        slug = self.get_argument("slug", None)
        content = self.get_argument("content", "")
        template = self.get_argument("template", "staticpage.html")

        page = Page()
        page.name = name
        page.slug = slug
        page.content = content
        page.template = template

        try:
            page.validate()
            page.save()
            self.flash(u"栏目%s添加成功" % name)
            self.redirect("/admin/pages")
            return
        except Exception, ex:
            self.flash(str(ex))
コード例 #11
0
ファイル: crawler.py プロジェクト: meteo-rain/ptt
    def request_page(self, url):
        global last_requst_time
        import hashlib
        from sqlalchemy import desc
        query_result = self.__session_crawler.query(Page).filter_by(
            url=url).order_by(desc(Page.mtime)).first()
        if query_result and os.path.isfile(
                os.path.join(self.__data_dir, query_result.file_path)):
            return open(os.path.join(self.__data_dir, query_result.file_path),
                        'rb').read()
        now = datetime.datetime.now()
        print('[{0}] Request {1}'.format(now.strftime("%Y-%m-%d %H:%M:%S.%f"),
                                         url))
        requst_timedelta = (now - last_requst_time).microseconds
        if requst_timedelta < 500000:  # 0.5s
            time.sleep(0.5 - requst_timedelta / 1000000)
        request_args = {'cookies': self.get_cookies(url)}
        r = requests.get(url, **request_args)
        #print('[{0}] Receive {1}'.format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),url))

        last_requst_time = now
        if r is None or r.status_code != 200 or r.content is None:
            return '<html></html>'

        content_hash = hashlib.md5(r.content).hexdigest()
        file_path = os.path.abspath(
            os.path.join(self.__data_dir,
                         self.url_to_file_path(url, content_hash)))
        try:
            os.makedirs(os.path.dirname(file_path))
        except:
            pass
        page = Page(url=url,
                    size=len(r.content),
                    file_path=file_path,
                    content_hash=content_hash,
                    mtime=now)
        self.__session_crawler.add(page)
        self.__session_crawler.commit()
        open(os.path.join(self.__data_dir, file_path), 'wb').write(r.content)
        return r.content
コード例 #12
0
    def start_crawler(self):
        start = time.time()

        # read robots.txt
        tmp = "http://" + self.base + "/robots.txt"
        self.robot_parser.set_url(tmp)
        self.robot_parser.read()

        # put first link
        self.q.put((0, self.website))
        new_page = Page(url=self.website, text="", rank=0)
        session.add(new_page)
        session.commit()

        threads = []
        for x in range(self.threads_number):
            t = threading.Thread(target=self.worker)
            t.daemon = True
            threads.append(t)
            t.start()

        # wait until the queue becomes empty
        self.q.join()

        # join threads
        for i in range(self.threads_number):
            self.q.put(None)
        for t in threads:
            t.join()

        session.commit()

        # empty the queue
        self.q.queue.clear()

        end = time.time()
        print("With", self.threads_number, "threads elapsed : ", end - start)
        print("Total number of pages processed :",
              self.current_pages_processed)
コード例 #13
0
    def init(self):
        pages_added = 0
        with session_scope(self.Session) as session:
            if session.query(Page).count() == 0:
                for i in self.config.ROOT_NODES:
                    page = Page(url=i)
                    session.add(page)
                    session.commit()
                    self.queue.add_page(page.page_id)
                    pages_added += 1
            else:
                for page in session.query(Page).filter(Page.state != Page.State.PROCESSED).all():
                    self.queue.add_page(page.page_id)
                    pages_added += 1
            session.expunge_all()

        if pages_added != 0:
            print('No movie data in our system. We need to scrape IMDB for data...')
            print('Started pipeline! Added %s root pages to processing queue' % pages_added)
            self.queue.join()
            print("Finished processing!")
        self.search_module.build_index()
コード例 #14
0
from flask_sqlalchemy import SQLAlchemy

from model import Page, User, Role, Image
from setting import app

db = SQLAlchemy(app)


# db.drop_all()
# db.create_all()

page = Page()
x=page.query.filter_by(title='homepage').first()
if x is None:
    page.title='homepage'
    page.contents='<h1> Selamat datang di dunia python </h1>'
    page.is_homepage=True
    db.session.add(page)
    db.session.commit()

# x1=page.query.filter_by(title='Hallo Dunia').first()
# if x1 is None:
#     page.title='Hallo Dunia'
#     page.contents='<h1> Hallo Dunia ? apa kabar... </h1>'
#     page.is_homepage=False
#     page.url='page/hallo-dunia'
#     page.image_id=1
#     db.session.add(page)
#     db.session.commit()

gbr=Image()