def cli(): """command-line interface""" arguments = get_arg(docopt(__doc__)) url = get_url(arguments) headers = get_head() try: response = requests.get(url, verify=False, headers=headers) logger.debug(response) except: logger.error('Timeout error!') exit() if response.status_code == requests.codes.ok: try: res_json = response.json() except: logger.warning('JSON parse failed. Try again.') exit() else: logger.debug(res_json) if res_json['status']: rows = res_json['data'] # 一级解析 trains = TrainCollection(rows, arguments) # 二级解析 创建trains对象 try: trains.pretty_print() except: logger.warning('prettytable print failed.') exit() else: logger.error('Result not found. Please check the log.')
def get_args_default_test(self): """Test if the returned args is the default if the env variable does not exist """ foo = get_arg(env='UNNECESSARILY_COMPLICATED_ENV_VAR', default='bar') assert_equal(foo, 'bar')
def get_args_not_empty_test(self): """Test if the returned args is the expected if the env variable exists """ os.environ['FOO'] = 'foo' foo = get_arg(env='FOO', default='') assert_equal(foo, 'foo')
def get_args_empty_test(self): """Test if the returned args is empty if the env variable does not exist """ foo = get_arg(env='FOO', default='') assert_equal(foo, '')
def main(): # Basic configuration data_dir = utils.ensure_path('~/data_analysis/fcb/') # group_id = 153748404666241 group_id = 597682743580084 # Load basic arguments log("Parsing basic arguments") missing_arg = utils.check_required_arg(utils.login_opt, utils.password_opt, utils.ch_driver_opt) if missing_arg is not None: utils.exit_program('Missing required argument ' + missing_arg) login = utils.get_arg(utils.login_opt) password = utils.get_arg(utils.password_opt) driver_path = utils.get_arg(utils.ch_driver_opt) log('Starting browser') browser = webdriver.Chrome(executable_path=driver_path) scrapper = FcbBrowserScrapper(browser, login, password) scrapper.log_in() # Scrap users = scrapper.scrap_group_members(group_id) # Temporary code for loading user from file rather from the web # to speed up the development # users_file = utils.ensure_path('~/data_analysis/mlyny_group.txt') # utils.save_data(users, users_file) # users = [] # with open(users_file, encoding='utf-8') as f: # for line in f: # if line == '': # break # s = line.split(',') # users.append((s[0], s[1])) # Process scrapper.process_users(users, data_dir) log('Closing the browser') browser.close() log('The end')
def factory(): """Method to return a concrete instance of a `Cache` store as specified by the environment variables. Possible cache backends include memory and jdg/infinispan. Any other value will fallback to a memory cache. :return: A concrete instance of a `Cache` store. :rtype: Cache """ CACHE_TYPE = get_arg('CACHE_TYPE', 'memory') CACHE_HOST = get_arg('CACHE_HOST', '') CACHE_PORT = get_arg('CACHE_PORT', '') CACHE_NAME = get_arg('CACHE_NAME', '') if CACHE_TYPE == 'jdg' or CACHE_TYPE == 'infinispan': return InfinispanCache(host=CACHE_HOST, name=CACHE_NAME, port=CACHE_PORT) else: return MemoryCache()
def loop(request_q, response_q): """processing loop for predictions This function is meant to be used as the main loop for a process, it will wait for new requests on the request_q queue and write responses on the response_q queue. """ # get the model store backend # if none provided the default is `mongodb://localhost:27017` MODEL_STORE_URI = get_arg('MODEL_STORE_URI', 'mongodb://localhost:27017') # get the minimum interval (in miliseconds) between model store checks for # updated models. default is one minute (at the limit, for a check at every # request, set to `0`). MODEL_STORE_CHECK_RATE = get_arg('MODEL_STORE_CHECK_RATE', 60000) # just leaving these here for future reference (elmiko) spark = pysql.SparkSession.builder.appName("JiminyRec").getOrCreate() # load the local jar file we will need localjar = os.path.join(os.environ['PWD'], 'libs', 'spark-als-serializer_2.11-0.2.jar') loader = spark._jvm.Thread.currentThread().getContextClassLoader() url = spark._jvm.java.net.URL('file:' + localjar) loader.addURL(url) # get the SparkContext singleton from the JVM (not the pyspark API) context = spark._jvm.org.apache.spark.SparkContext.getOrCreate() context.addJar(localjar) print('------------------- loading jar -------------------------------') print(url) # get a context (from the pyspark API) to do some work sc = spark.sparkContext # load the latest model from the model store model_reader = storage.ModelFactory.fromURL(sc=sc, url=MODEL_STORE_URI) model = model_reader.readLatest() # Last time the model store was checked last_model_check = datetime.datetime.now() response_q.put('ready') # let the main process know we are ready to start # acquire logger _logger = logger.get_logger() while True: # calculate how much time elapsed since the last model check current_time = datetime.datetime.now() model_check_delta = current_time - last_model_check # if the model check was performed longer than the check rate threshold if model_check_delta.total_seconds() * 1000 >= MODEL_STORE_CHECK_RATE: # check for new models in the model store latest_id = model_reader.latestId() if model.version != latest_id: model = model_reader.read(version=latest_id) # invalidade the cache, since we are using a new model response_q.put('invalidate') last_model_check = current_time req = request_q.get() # stop the processing loop if req == 'stop': break resp = req # preform a top-k rating for the specified user prediction if 'topk' in req: # make rank predictions # check if we have a valid user if model.valid_user(req['user']): recommendations = model.als.recommendProducts(int(req['user']), int(req['topk'])) # update the cache store resp.update(products=[ {'id': recommendation[1], 'rating': recommendation[2]} for recommendation in recommendations ]) else: _logger.error("Requesting rankings for invalid user id={}" .format(req['user'])) resp.update(products=[]) response_q.put(resp) else: # make rating predictions items = sc.parallelize( [(req['user'], p['id']) for p in req['products']]) predictions = model.als.predictAll(items).map( lambda x: (x[1], x[2])).collect() # update the cache store resp.update(products=[ {'id': item[0], 'rating': item[1]} for item in predictions ]) response_q.put(resp)
if ':' in authors: start = int(authors.split(':')[0]) end = int(authors.split(':')[1]) authors = [] while start <= end: authors.append(start) start += 1 return authors try: return [int(author) for author in authors.split(',')] except: return None if __name__ == '__main__': authors = _arg_authors(utils.get_arg('-a')) if not authors: msg = 'No author indicies given.\n' msg += 'Usage:\n' \ ' -a 1 (single author index)\n' \ ' -a 1,2,5 (<spam>1,2,3!</spam> series of author indices)\n' \ ' -a 1:100 (range of authors between indices)\n' \ ' -fold expects a comma separated list of arff files\n' \ ' with dot notation for their target class.\n' \ ' example: result1.arff:1,result2.arff:1,result3.arff:2' sys.exit(msg) output = utils.OutputWriter() # Build training set or test set?
import requests, utils from http.cookies import SimpleCookie from http import cookies # Loging of requests # logging.basicConfig(level=logging.DEBUG) url = 'http://www.facebook.com/login.php?login_attempt=1' missing_arg = utils.check_required_arg(utils.login_opt, utils.password_opt) if missing_arg is not None: utils.exit_program("Missing required argument " + missing_arg) email = utils.get_arg(utils.login_opt) password = utils.get_arg(utils.password_opt) payload = {'email': email, 'pass' : password} resp_counter = 0 def log_response(response): print('Response:', response) indent = '\t' print(indent, 'Status:', response.status_code) if response.status_code != 200: print(indent, 'Redirect to:', response.headers['location']) # print(' Content: ', response.text) print(indent, 'Headers: ', response.headers) if 'set-cookie' in response.headers: print(indent, 'Cookies:')
def get_test_from_class(test_class): """ Prepare test set from provide csv file. """ test = genfromtxt(open('data/' + test_class,'r'), delimiter=',', dtype='f8') return [x[:-1] for x in test] def get_test_from_text(text): """ Prepare test set from provided text file. """ writer = utils.OutputWriter() return writer.write_from_textfile(text) if __name__ == '__main__': train_set = utils.get_arg('-traincsv', 'training_2-6.csv') test_class = utils.get_arg('-testcsv') test_text = utils.get_arg('-testtext') if not (test_text or test_class): sys.exit('Please provide either -texttext or -testcsv parameter.') rf = get_classifier(train_set) if test_class: test = get_test_from_class(test_class) else: test = get_test_from_text(test_text) data = rf.predict(test).tolist() result = {}
if utils.has_arg('--nosqrt'): diag[:, 2:] = diag[:, 2:] ** 0.5 annotate_cnt = 4 if utils.has_arg('--nonums'): annotate_cnt = 0 no_cut = False if utils.has_arg('--nocut'): no_cut = True if utils.has_arg('--stdlim'): limit = 0.25 no_cut = True else: tmp = utils.get_arg('--lim') if tmp: limit = float(tmp) no_cut = True else: if diag[diag[:, 3] != Inf, 3].size == 0: limit = 1. else: limit = max(max(diag[:, 2]), max(diag[diag[:, 3] != Inf, 3])) margin = limit * 0.01 diag[diag[:, 3] == Inf, 3] = limit + margin * 0.9 if no_cut: cut_x = cut_y = cut = [1., 0.] else: srt = sort(diag[:, 2])
@route('/article/<id:int>/edit') @view('edit') def edit(id): login_status = request.get_cookie('login_status', False, secret=SECRET_KEY) if login_status: article = c.execute('SELECT rowid, * FROM articles WHERE rowid == ?', (id, )).fetchall() return dict(login_status=login_status, article=article[0]) else: redirect('/login') @post('/article/<id:int>/edit') @view('message_page') def do_edit(id): title = request.forms.title content = request.forms.content date = str(dt.datetime.now()).split('.')[0] c.execute( 'UPDATE articles SET date = ?, title = ?, content = ? WHERE rowid == ?', ('最終更新日時:' + date, title, content, id)) conn.commit() return dict(login_status=True, error=False, message='記事を編集しました。') if __name__ == '__main__': host = get_arg(1, 'localhost') port = int(get_arg(2, 5000)) web.open(f'http://{host}:{port}') # ブラウザーを開く run(host=host, port=port, debug=True)