Beispiel #1
0
def retrieve_page(url, add_if_not_found=True):
    print "retrieving Page for ....%s" % (url)
    with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    if Page.get_total_num_of_pages() > app.config['MAX_CORPUS_SIZE']:
        ### now we need to stop crawling
        # celery.control.broadcast('shutdown') 
        # Earlier I had this, but this shuts down celery, it stops Page population
        # But also stops Extraction population. But there has to be a one-t-one between pages and extractions
        
        # Therefore, we just stop consuming from "retrieve" queue. This is the queue to which
        # app.tasks.retrieve_page is configured to. Rest are on a different queue. Therefore,
        # other dependent tasks go through
        celery.control.cancel_consumer("retrieve") 
        #celery.control.add_consumer("retrieve") # We will have to issue this before retrieve_page task is called.
        
        return
    page = Page.get_page_by_url(url)
    if page is None:
        if add_if_not_found: # add a page
            page = Page.add_page(url)
        else: # just return
            return pagenotfound()
    else:
        pass # do nothing
    retrieve_extraction.delay(page.id)
    find_links.delay(page.id)
    
    #retrieve_extraction.delay(page.id)
    # The reason this was commented was because boilerpipe_extract_and_populate task was getting overwhelmed
    # because the page population was growing so fast.
    # New approach: First populate 1000 pages. The stop page population and start the extraction process
    
    #Using Rest API
    ''''r = requests.get("http://127.0.0.1:5000/pages", params={"url":url})
Beispiel #2
0
def find_links(page_id):
    with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    page = Page.find_links(page_id)
    if page is None: return pagenotfound()
    for link in page.links:
        retrieve_page.delay(link)
 def test_check_rate_request_is_required(self):
     url = '/?destination=NOGJM&date_from=2016-01-01&date_to=2016-01-30'
     with app.test_request_context(url):
         app.preprocess_request()
         try:
             check_rate_request()
             assert False
         except UnprocessableEntity as e:
             assert e.description.lower() == "origin is required"
 def test_check_rate_request_success(self):
     url = '/?origin=CNNBO&destination=NOGJM&date_from=2016-01-01&date_to=2016-01-30'
     with app.test_request_context(url):
         app.preprocess_request()
         origin, destination, date_from, date_to = check_rate_request()
         assert origin == "CNNBO"
         assert destination == "NOGJM"
         assert date_from == "2016-01-01"
         assert date_to == "2016-01-30"
Beispiel #5
0
 def test_hook(self):
     with app4.test_request_context("/"):
         app4.preprocess_request()
         local = LocalStorage()
         self.assertTrue("nowtime" in local.list)
         nowhour = time.strftime("%Y-%m-%d %H:",
                                 time.localtime(time.time()))
         self.assertIn(nowhour, local.get("nowtime"))
         del local["nowtime"]
 def test_check_rate_request_is_date_range(self):
     url = '/?origin=CNNBO&destination=NOGJM&date_from=2016-01-31&date_to=2016-01-20'
     with app.test_request_context(url):
         app.preprocess_request()
         try:
             check_rate_request()
             assert False
         except UnprocessableEntity as e:
             assert e.description.lower(
             ) == 'to_date should be higher than from'
Beispiel #7
0
 def test_d_del(self):
     with app.test_request_context("/c2c/del_one_accommodation/1"):
         user_id = self.login()
         app.preprocess_request()
         if(user_id):
             rv = self.app.post('/c2c/del_one_accommodation/1')
             data = rv.get_json()
             print("accommodation return ",data,"\n")
             assert data['success']
         else:
             return False 
Beispiel #8
0
    def test_c_userinfo(self):
        with app.test_request_context("/c2c/userinfo"):
            self.app.post('/c2c/login',json={
            'email':"*****@*****.**",
            'password':'******'})

            app.preprocess_request()
            rv = self.app.get('/c2c/userinfo')
            data = rv.get_json()
            print("userinfo return ",data,"\n")
            assert "success" not in data
 def test_check_rate_request_is_date_formatted(self):
     wrong_date = '2016-31-01'
     url = '/?origin=CNNBO&destination=NOGJM&date_from=%s&date_to=2016-01-30' % wrong_date
     with app.test_request_context(url):
         app.preprocess_request()
         try:
             check_rate_request()
             assert False
         except UnprocessableEntity as e:
             assert e.description.lower() == (
                 '%s should formatted in yyyy-mm-dd' % wrong_date)
Beispiel #10
0
    def test_e_passwordchange(self):
        with app.test_request_context("/c2c/userinfo"):
            self.app.post('/c2c/login',json={
            'email':"*****@*****.**",
            'password':'******'})

            app.preprocess_request()
            rv = self.app.post('/c2c/changepassword',json={
                                'password':"******"})
            data = rv.get_json()
            print("password change return ",data,"\n")
            assert data['success']
Beispiel #11
0
    def test_d_userupdate(self):
        with app.test_request_context("/c2c/userinfo"):
            self.app.post('/c2c/login',json={
            'email':"*****@*****.**",
            'password':'******'})

            app.preprocess_request()
            rv = self.app.post('/c2c/userupdate',json={
                                'nickname':'testuser',
                                'phone':"111111",
                                "name":"bbbn",
                                "id_card":"3422712732376"})
            data = rv.get_json()
            print("userupdate return ",data,"\n")
            assert data['success']
Beispiel #12
0
    def setUp(self):
        def cleanup_db():
            self.current_transaction.rollback()
            if self.app_context:
                self.app_context.pop()

        self.addCleanup(cleanup_db)

        self.current_transaction = self.connection.begin_nested()
        db.session = orm.scoped_session(orm.sessionmaker(bind=self.connection))

        if self.CREATE_DEFAULT_APP_CONTEXT:
            self.app_context = app.test_request_context('/testing')
            self.app_context.push()
            app.preprocess_request()
        else:
            self.app_context = None
Beispiel #13
0
def retrieve_extraction(page_id,add_if_not_found=True):
    print "retrieving Extraction for ....%s" % (page_id)
    with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    extraction = Extraction.get_extraction_by_page_id(page_id)
    if extraction is None:
        if add_if_not_found: # add a page
            extraction = Extraction.add_extraction(page_id)
        else:
            return extractionnotfound
    else:
        pass # do nothing
    #<-->We will not do boilerpipe extraction here... We are going to simply put an extraction page
    # But we will run a separate process that activates boilerpipe taking page.id and extraction.id.
    #boilerpipe_extract_and_populate.delay(page_id,extraction.id)
    
    #Using Rest API
    '''rExt = requests.get("http://127.0.0.1:5000/extractions", params={"page_id":page_id})
Beispiel #14
0
 def test_a_publish(self):
     with app.test_request_context("/c2c/userinfo"):
         user_id = self.login()
         app.preprocess_request()
         if(user_id):
             rv = self.app.post('/c2c/accommodation/add',json={
                 'acc_address':'weihai',
                 'acc_capacity':300,
                 'acc_price':"6000",
                 'acc_city':1,
                 'acc_description':"nothing",
                 'acc_user_id':3,
                 'acc_type_id':1
                 })
             data = rv.get_json()
             print("accommodation return ",data,"\n")
             assert data['success']
         else:
             return False 
Beispiel #15
0
def boilerpipe_extract_and_populate(page_id=None, ext_id=None):
    print "extracting using boilerpipe..."
    
    # For some reason this approach of directly calling the static method is not working
    '''with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    BoilerpipeExtraction.extract_content(page_id, ext_id)'''
    
    # Therefore, switching to calling the REST API. This seems to be working 
    #Using Rest API
    #return requests.get("http://127.0.0.1:5000/extractions/bp/%s,%s"%(page_id,ext_id))
    
    # approach 2:
    with app.test_request_context('/'): # this is to adjust for the fact that we are in celery content and not Flask context 
        app.preprocess_request()
    for page in Page.get_all_pages():
        if page is not None:
            extraction = Extraction.get_extraction_by_page_id(page.id)
            requests.get("http://127.0.0.1:5000/extractions/bp/%s,%s"%(page.id,extraction.id))
        else:
            pass
    return
Beispiel #16
0
def exposed_fetch_unmapped_qidian_items():
    '''
	'''
    from app import app
    from flask import g

    # with app.app_context():
    with app.test_request_context(""):
        app.preprocess_request()

        print("Querying for rss feed items.")
        # Hard coded for my database. Because fuk u \
        releases = g.session.query(db.RssFeedPost)   \
         .filter(db.RssFeedPost.feed_id == 2578)   \
         .all()

        print("Processing items")
        urls = [item.contenturl for item in releases]

        relmap = {}
        for release in releases:
            if "/rssbook/" in release.contenturl:
                continue
            trimmed = "/".join(release.contenturl.split("/")[:5]) + "/"
            relmap.setdefault(trimmed, [])
            relmap[trimmed].append(release)

        print("Fetched %s urls, %s distinct series" % (len(urls), len(relmap)))

        for itemlist in relmap.values():
            itemlist.sort(key=lambda x: x.id)

        truncated_releases = [tmp[0] for tmp in relmap.values()]

        print("Truncated releases: %s" % len(truncated_releases))

        items = proto_process_releases(truncated_releases)
        print("Processing resulted in %s feed items" % len(items['missed']))

        feed_urls = [tmp[1]['linkUrl'] for tmp in items['missed']]
        trimmed = ["/".join(tmp.split("/")[:5]) + "/" for tmp in feed_urls]

        new_series_urls = list(set(trimmed))
        print("Releases consolidated to %s distinct series" %
              len(new_series_urls))

    bad_names = [
        '12testett11223355',
        'webnovel test003',
        'www.webnovel.com',
    ]
    wg = WebRequest.WebGetRobust()
    for url in new_series_urls:
        meta = common.management.util.get_page_title(wg, url)
        if not any([tmp in meta['title'] for tmp in bad_names]):
            print('Missing: "%s" %s: "%s",' % (url, " " *
                                               (50 - len(url)), meta))
            itemid = url.split("/")
            itemid = [tmp for tmp in itemid if tmp]
            itemid = itemid[-1]
            print(
                "'%s' : ('%s',                                                                     '%s'),"
                % (itemid, meta['title'].strip(), 'oel'
                   if 'is-orig' in meta and meta['is-orig'] else 'translated'))
Beispiel #17
0
import os
import sys
import readline
import random
import datetime
from pprint import pprint as p

import ujson

from flask import *

from app import app, db
from app.orders.models import *
from app.taxi.models import *


os.environ["PYTHONINSPECT"] = "True"

# def flushall():
#     db.drop_all()
#     db.create_all()


# Add test request context
ctx = app.test_request_context()
ctx.push()
app.preprocess_request()