Esempio n. 1
0
def cli():
    """command-line interface"""
    arguments = get_arg(docopt(__doc__))
    url = get_url(arguments)
    headers = get_head()
    try:
        response = requests.get(url, verify=False, headers=headers)
        logger.debug(response)
    except:
        logger.error('Timeout error!')
        exit()
    if response.status_code == requests.codes.ok:
        try:
            res_json = response.json()
        except:
            logger.warning('JSON parse failed. Try again.')
            exit()
        else:
            logger.debug(res_json)
            if res_json['status']:
                rows = res_json['data']  # 一级解析
                trains = TrainCollection(rows, arguments)  # 二级解析 创建trains对象
                try:
                    trains.pretty_print()
                except:
                    logger.warning('prettytable print failed.')
                    exit()
            else:
                logger.error('Result not found. Please check the log.')
Esempio n. 2
0
    def get_args_default_test(self):
        """Test if the returned args is the default if the env variable does
        not exist
        """
        foo = get_arg(env='UNNECESSARILY_COMPLICATED_ENV_VAR', default='bar')

        assert_equal(foo, 'bar')
Esempio n. 3
0
    def get_args_not_empty_test(self):
        """Test if the returned args is the expected if the env variable exists
        """
        os.environ['FOO'] = 'foo'
        foo = get_arg(env='FOO', default='')

        assert_equal(foo, 'foo')
Esempio n. 4
0
    def get_args_empty_test(self):
        """Test if the returned args is empty if the env variable does not
        exist
        """
        foo = get_arg(env='FOO', default='')

        assert_equal(foo, '')
Esempio n. 5
0
def main():
    # Basic configuration
    data_dir = utils.ensure_path('~/data_analysis/fcb/')
#     group_id = 153748404666241
    group_id = 597682743580084
    
    # Load basic arguments
    log("Parsing basic arguments")
    missing_arg = utils.check_required_arg(utils.login_opt, utils.password_opt, utils.ch_driver_opt)
    if missing_arg is not None:
        utils.exit_program('Missing required argument ' + missing_arg)
    login = utils.get_arg(utils.login_opt)
    password = utils.get_arg(utils.password_opt)
    driver_path = utils.get_arg(utils.ch_driver_opt)


    log('Starting browser')
    browser = webdriver.Chrome(executable_path=driver_path)

    scrapper = FcbBrowserScrapper(browser, login, password)
    scrapper.log_in()
    
    # Scrap
    users = scrapper.scrap_group_members(group_id)

#     Temporary code for loading user from file rather from the web
#     to speed up the development
#     users_file = utils.ensure_path('~/data_analysis/mlyny_group.txt')
#     utils.save_data(users, users_file)
#     users = []
#     with open(users_file, encoding='utf-8') as f:
#         for line in f:
#             if line == '':
#                 break
#             s = line.split(',')
#             users.append((s[0], s[1]))

    # Process
    scrapper.process_users(users, data_dir)
    

    
 
    log('Closing the browser')
    browser.close()
    log('The end')
Esempio n. 6
0
def factory():
    """Method to return a concrete instance of a `Cache` store as specified by
    the environment variables.

    Possible cache backends include memory and jdg/infinispan. Any other value
    will fallback to a memory cache.

    :return: A concrete instance of a `Cache` store.
    :rtype: Cache
    """
    CACHE_TYPE = get_arg('CACHE_TYPE', 'memory')
    CACHE_HOST = get_arg('CACHE_HOST', '')
    CACHE_PORT = get_arg('CACHE_PORT', '')
    CACHE_NAME = get_arg('CACHE_NAME', '')

    if CACHE_TYPE == 'jdg' or CACHE_TYPE == 'infinispan':
        return InfinispanCache(host=CACHE_HOST,
                               name=CACHE_NAME,
                               port=CACHE_PORT)
    else:
        return MemoryCache()
Esempio n. 7
0
def loop(request_q, response_q):
    """processing loop for predictions

    This function is meant to be used as the main loop for a process, it
    will wait for new requests on the request_q queue and write responses on
    the response_q queue.
    """

    # get the model store backend
    # if none provided the default is `mongodb://localhost:27017`
    MODEL_STORE_URI = get_arg('MODEL_STORE_URI', 'mongodb://localhost:27017')

    # get the minimum interval (in miliseconds) between model store checks for
    # updated models. default is one minute (at the limit, for a check at every
    # request, set to `0`).
    MODEL_STORE_CHECK_RATE = get_arg('MODEL_STORE_CHECK_RATE', 60000)

    # just leaving these here for future reference (elmiko)

    spark = pysql.SparkSession.builder.appName("JiminyRec").getOrCreate()

    # load the local jar file we will need
    localjar = os.path.join(os.environ['PWD'],
                            'libs',
                            'spark-als-serializer_2.11-0.2.jar')
    loader = spark._jvm.Thread.currentThread().getContextClassLoader()
    url = spark._jvm.java.net.URL('file:' + localjar)
    loader.addURL(url)
    # get the SparkContext singleton from the JVM (not the pyspark API)
    context = spark._jvm.org.apache.spark.SparkContext.getOrCreate()
    context.addJar(localjar)
    print('------------------- loading jar -------------------------------')
    print(url)

    # get a context (from the pyspark API) to do some work
    sc = spark.sparkContext

    # load the latest model from the model store
    model_reader = storage.ModelFactory.fromURL(sc=sc, url=MODEL_STORE_URI)

    model = model_reader.readLatest()

    # Last time the model store was checked
    last_model_check = datetime.datetime.now()

    response_q.put('ready')  # let the main process know we are ready to start

    # acquire logger
    _logger = logger.get_logger()

    while True:

        # calculate how much time elapsed since the last model check
        current_time = datetime.datetime.now()
        model_check_delta = current_time - last_model_check
        # if the model check was performed longer than the check rate threshold
        if model_check_delta.total_seconds() * 1000 >= MODEL_STORE_CHECK_RATE:
            # check for new models in the model store
            latest_id = model_reader.latestId()
            if model.version != latest_id:
                model = model_reader.read(version=latest_id)
                # invalidade the cache, since we are using a new model
                response_q.put('invalidate')
            last_model_check = current_time

        req = request_q.get()
        # stop the processing loop
        if req == 'stop':
            break
        resp = req

        # preform a top-k rating for the specified user prediction
        if 'topk' in req:  # make rank predictions
            # check if we have a valid user
            if model.valid_user(req['user']):
                recommendations = model.als.recommendProducts(int(req['user']),
                                                              int(req['topk']))
                # update the cache store
                resp.update(products=[
                    {'id': recommendation[1], 'rating': recommendation[2]}
                    for recommendation in recommendations
                ])
            else:
                _logger.error("Requesting rankings for invalid user id={}"
                              .format(req['user']))
                resp.update(products=[])
            response_q.put(resp)

        else:
            # make rating predictions
            items = sc.parallelize(
                [(req['user'], p['id']) for p in req['products']])
            predictions = model.als.predictAll(items).map(
                lambda x: (x[1], x[2])).collect()
            # update the cache store
            resp.update(products=[
                {'id': item[0], 'rating': item[1]}
                for item in predictions
            ])
            response_q.put(resp)
Esempio n. 8
0
    if ':' in authors:
        start = int(authors.split(':')[0])
        end = int(authors.split(':')[1])
        authors = []
        while start <= end:
            authors.append(start)
            start += 1
        return authors
    try:
        return [int(author) for author in authors.split(',')]
    except:
        return None


if __name__ == '__main__':
    authors = _arg_authors(utils.get_arg('-a'))

    if not authors:
        msg = 'No author indicies given.\n'
        msg += 'Usage:\n' \
               '     -a 1 (single author index)\n' \
               '     -a 1,2,5 (<spam>1,2,3!</spam> series of author indices)\n' \
               '     -a 1:100 (range of authors between indices)\n' \
               '     -fold expects a comma separated list of arff files\n' \
               '           with dot notation for their target class.\n' \
               '           example: result1.arff:1,result2.arff:1,result3.arff:2'
        sys.exit(msg)

    output = utils.OutputWriter()

    # Build training set or test set?
Esempio n. 9
0
import requests, utils
from http.cookies import SimpleCookie
from http import cookies

# Loging of requests
# logging.basicConfig(level=logging.DEBUG)

url = 'http://www.facebook.com/login.php?login_attempt=1'

missing_arg = utils.check_required_arg(utils.login_opt, utils.password_opt)
if missing_arg is not None:
    utils.exit_program("Missing required argument " + missing_arg)



email = utils.get_arg(utils.login_opt)
password = utils.get_arg(utils.password_opt)
payload = {'email': email, 'pass' : password}

resp_counter = 0

def log_response(response):
    print('Response:', response)
    indent = '\t'
    print(indent, 'Status:', response.status_code)
    if response.status_code != 200:
        print(indent, 'Redirect to:', response.headers['location'])
#     print('    Content: ', response.text)
    print(indent, 'Headers: ', response.headers)
    if 'set-cookie' in response.headers:
        print(indent, 'Cookies:')
Esempio n. 10
0
def get_test_from_class(test_class):
    """ Prepare test set from provide csv file.
    """
    test = genfromtxt(open('data/' + test_class,'r'), delimiter=',', dtype='f8')
    return [x[:-1] for x in test]


def get_test_from_text(text):
    """ Prepare test set from provided text file.
    """
    writer = utils.OutputWriter()
    return writer.write_from_textfile(text)

if __name__ == '__main__':
    train_set = utils.get_arg('-traincsv', 'training_2-6.csv')
    test_class = utils.get_arg('-testcsv')
    test_text = utils.get_arg('-testtext')

    if not (test_text or test_class):
        sys.exit('Please provide either -texttext or -testcsv parameter.')

    rf = get_classifier(train_set)
    if test_class:
        test = get_test_from_class(test_class)
    else:
        test = get_test_from_text(test_text)

    data = rf.predict(test).tolist()

    result = {}
if utils.has_arg('--nosqrt'):
    diag[:, 2:] = diag[:, 2:] ** 0.5

annotate_cnt = 4
if utils.has_arg('--nonums'):
    annotate_cnt = 0

no_cut = False
if utils.has_arg('--nocut'):
    no_cut = True
if utils.has_arg('--stdlim'):
    limit = 0.25
    no_cut = True
else:
    tmp = utils.get_arg('--lim')
    if tmp:
        limit = float(tmp)
        no_cut = True
    else:
        if diag[diag[:, 3] != Inf, 3].size == 0:
            limit = 1.
        else:
            limit = max(max(diag[:, 2]), max(diag[diag[:, 3] != Inf, 3]))
margin = limit * 0.01
diag[diag[:, 3] == Inf, 3] = limit + margin * 0.9

if no_cut:
    cut_x = cut_y = cut = [1., 0.]
else:
    srt = sort(diag[:, 2])
Esempio n. 12
0
@route('/article/<id:int>/edit')
@view('edit')
def edit(id):
    login_status = request.get_cookie('login_status', False, secret=SECRET_KEY)
    if login_status:
        article = c.execute('SELECT rowid, * FROM articles WHERE rowid == ?',
                            (id, )).fetchall()
        return dict(login_status=login_status, article=article[0])
    else:
        redirect('/login')


@post('/article/<id:int>/edit')
@view('message_page')
def do_edit(id):
    title = request.forms.title
    content = request.forms.content
    date = str(dt.datetime.now()).split('.')[0]
    c.execute(
        'UPDATE articles SET date = ?, title = ?, content = ? WHERE rowid == ?',
        ('最終更新日時:' + date, title, content, id))
    conn.commit()
    return dict(login_status=True, error=False, message='記事を編集しました。')


if __name__ == '__main__':
    host = get_arg(1, 'localhost')
    port = int(get_arg(2, 5000))
    web.open(f'http://{host}:{port}')  # ブラウザーを開く
    run(host=host, port=port, debug=True)