def main(): search_url = "https://russian.alibaba.com/trade/search?fsb=y&IndexArea=product_en&SearchText=" # Retrieve command line args pages = sys.argv[1].rstrip() search_str = "" if pages.isdigit(): pages = int(pages) search_str = sys.argv[2].rstrip() else: pages = 2 search_str = sys.argv[1].rstrip() merged_result = [] sessionManager = SessionManager() search_url = search_url + parse.quote_plus(search_str) + '&page=' # Iterate over pages for i in range(1, pages + 1): url = search_url + str(i) scrapper = PageScrapper(url) scrapper.sessionManager = sessionManager scrapper.get_page() scrapper.get_items() merged_result.append(scrapper.result) del sessionManager merged_result = [i for i in itertools.chain(*merged_result)] print( json.dumps(merged_result, indent=4, sort_keys=True, ensure_ascii=False))
def main(secondsAgo=None, year=None, exclude=None, reinit=False): manager = SessionManager() manager.generateEngine() manager.initializeDatabase(reinit) manager.createSession() loadFromTime = None startTime = datetime.now() if secondsAgo is not None: loadFromTime = startTime - timedelta(seconds=secondsAgo) if exclude != 'cce': loadCCE(manager, loadFromTime, year) if exclude != 'ccr': loadCCR(manager, loadFromTime, year) indexUpdates(manager, loadFromTime) manager.closeConnection()
# Session config app.config['SESSION_TYPE'] = 'mongodb' app.config['SESSION_MONGODB'] = mongoClient app.config['SESSION_MONGODB_DB'] = "citations-app2" app.config['SESSION_MONGODB_COLLECT'] = "sessions" app.config['SESSION_COOKIE_SECURE'] = True app.config['SESSION_COOKIE_HTTPONLY'] = True app.config['SESSION_PERMANENT'] = True # For a 1 minute validity session, set minutes=121 (date is not the same format before database & this API) app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(minutes=122) # if SESSION_USE_SIGNER set to True, you have to set flask.Flask.secret_key app.config['SESSION_USE_SIGNER'] = True Session(app) sessMgr = SessionManager(mongo) mailMgr = MailManager() ph = PasswordHasher() ''' HEADERS X-Frame-Options : éviter les attaques de clickjacking pour s'assurer que le contenu ne soit pas embarqué dans d'autres sites X-Content-Type-Options est un marqueur utilisé par le serveur pour indiquer que les types MIME annoncés dans les en-têtes Content-Type ne doivent pas être modifiés ou et suivis ''' httpResponseHeaderOptions = { 'X-Frame-Options': 'sameorigin', 'X-Content-Type-Options': 'nosniff' } @app.route('/', methods=['GET', 'POST'])