def testGenerateUrl(self): expected = ("http://example.com/media/foo_r200.jpg" "?d7a3f8c02c4ecb0c13aa024e1d80d1053ad1deec") result = utils.generate_url("http://example.com/media/foo.jpg", "_r200") self.assertEqual(expected, result)
def fetch(category, ssl, page, search): if search == True: url = generate_url(category, page, flag=True) else: url = generate_url(category, page, flag=False) source_code = requests.get(url, verify=ssl) plain_text = source_code.text.encode('utf-8') soup = BeautifulSoup(plain_text, "lxml") torrent_name = [] torrent_seeds = [] torrent_size = [] torrent_leechers = [] torrent_megent = [] sno = [] for i in soup.findAll('table', {'class': 'data'}): for j in i('a', {'class': 'cellMainLink'}): torrent_name.append(j.get_text()) for j in i('td', {'class': 'nobr center'}): torrent_size.append(j.get_text()) for j in i('td', {'class': 'green center'}): torrent_seeds.append(j.get_text()) for j in i('td', {'class': 'red lasttd center'}): torrent_leechers.append(j.get_text()) for j in i('div', {'data-sc-paramas': 'magnet'}): torrent_megent.append(j.get_text()) for j in i('div', {'class': 'none'}): k = ast.literal_eval(j.get('data-sc-params')) torrent_megent.append(k['magnet']) for i in xrange_(8): for j in xrange_(25): sno.append(j + 1) decorative_combine = zip(sno, torrent_name, torrent_size, torrent_seeds, torrent_leechers) combine = zip(sno, torrent_name, torrent_size, torrent_seeds, torrent_leechers, torrent_megent) return decorative_combine, combine
def folder(folder_name): try: f = Folder.get(name=folder_name) except peewee.DoesNotExist: return jsonify(message='error'), 404 if request.method == 'POST': file = request.files['file'] if file: actual_filename = secure_filename( folder_name + '_' + file.filename) if os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], actual_filename)): return jsonify(message='error'), 409 file.save( os.path.join(app.config['UPLOAD_FOLDER'], actual_filename)) f2 = File.create(folder=folder_name, filename=file.filename, public_share_url=generate_url(), private_share_url=generate_url(), private_share_password=generate_password(), open_public_share=False, open_private_share=False) f2.save() return jsonify(message='OK'), 201 if request.method == 'GET': files = File.select().where(File.folder == folder_name) items = [{ 'filename': x.filename, 'public': x.public_share_url, 'private': x.private_share_url, 'password': x.private_share_password, 'openPublic': x.open_public_share, 'openPrivate': x.open_private_share } for x in files] return jsonify(message='OK', items=items) if request.method == 'DELETE': try: f.delete_instance() except peewee.IntegrityError: return jsonify(message='error'), 409 return jsonify(message='OK')
def rdrand(username, hdfs, input_directory, output_directory, **kwargs): if output_directory is None: output_directory = "%s/outputs" % (username) rdrand_output_directory = "%s/rdrand" % (output_directory) merged_output_directory = "%s/final_output" % (output_directory) (input_url, rdrand_output_url) = utils.generate_urls(input_directory, rdrand_output_directory, hdfs) merged_output_url = utils.generate_url(merged_output_directory, hdfs) rdrand_config = utils.mapreduce_job( input_dir=input_url, output_dir=rdrand_output_url, map_function="PassThroughMapFunction", reduce_function="CountDuplicateKeysReduceFunction", partition_function="UniformPartitionFunction") rdrand_params = { "SKIP_PHASE_ZERO": 1, # Don't sample... "INTERMEDIATE_TO_INPUT_RATIO": 3.0, #... instead assume ratio of 3 "MAP_INPUT_FORMAT_READER": "RdRandFormatReader", # 64-bit fragments "REDUCE_INPUT_FORMAT_READER": "FixedSizeKVPairFormatReader", # no header "REDUCE_INPUT_FIXED_KEY_LENGTH": 16, # 128-bit intermediate keys... "REDUCE_INPUT_FIXED_VALUE_LENGTH": 0, # ... with empty values "WRITE_WITHOUT_HEADERS.phase_one": 1 # no headers } if "params" not in rdrand_config: rdrand_config["params"] = {} for key, value in rdrand_params.items(): rdrand_config["params"][key] = value # Run a second job to merge all duplicate key information into a single # output file for better readability. mergefiles_config = merge_files(rdrand_output_directory, merged_output_directory, hdfs) return utils.run_in_sequence(rdrand_config, mergefiles_config)
def post(self): parse = reqparse.RequestParser() parse.add_argument('id', type=int, help='错误的id', default='0') parse.add_argument('token_required', type=int, default='0') parse.add_argument('day', type=int, default=1095) args = parse.parse_args() file_id = args.get('id') token_required = args.get('token_required') day = args.get('day') fileobj = FileNode.query.filter_by(id=file_id, user_id=current_user.uid).first() if fileobj is None: response = make_response( jsonify(code=11, message='node not exist, query fail')) return response if fileobj.is_share == False: fileobj.is_share = True share_url = generate_url() if token_required == 1: share_token = generate_share_token() else: share_token = '' shareobj = ShareTable(file_id=file_id, share_url=share_url, share_token=share_token, share_begin_time=int(time.time()), share_end_time=int(time.time()) + day * 24 * 3600) try: db.session.add(shareobj) db.session.commit() response = make_response( jsonify(code=0, message='OK', data={'share': shareobj.to_json()})) return response except Exception as e: # app.logger.exception(e) response = make_response( jsonify(code=12, message='node already exist , add fail')) return response
def rdrand(username, input_directory, output_directory, **kwargs): if output_directory is None: output_directory = "%s/outputs" % (username) rdrand_output_directory = "%s/rdrand" % (output_directory) merged_output_directory = "%s/final_output" % (output_directory) (input_url, rdrand_output_url) = utils.generate_urls( input_directory, rdrand_output_directory) merged_output_url = utils.generate_url(merged_output_directory) rdrand_config = utils.mapreduce_job( input_dir = input_url, output_dir = rdrand_output_url, map_function = "PassThroughMapFunction", reduce_function = "CountDuplicateKeysReduceFunction", partition_function = "UniformPartitionFunction") rdrand_params = { "SKIP_PHASE_ZERO": 1, # Don't sample... "INTERMEDIATE_TO_INPUT_RATIO": 3.0, #... instead assume ratio of 3 "MAP_INPUT_FORMAT_READER" : "RdRandFormatReader", # 64-bit fragments "REDUCE_INPUT_FORMAT_READER": "FixedSizeKVPairFormatReader", # no header "REDUCE_INPUT_FIXED_KEY_LENGTH": 16, # 128-bit intermediate keys... "REDUCE_INPUT_FIXED_VALUE_LENGTH": 0, # ... with empty values "WRITE_WITHOUT_HEADERS.phase_one": 1 # no headers } if "params" not in rdrand_config: rdrand_config["params"] = {} for key, value in rdrand_params.items(): rdrand_config["params"][key] = value # Run a second job to merge all duplicate key information into a single # output file for better readability. mergefiles_config = merge_files( rdrand_output_directory, merged_output_directory) return utils.run_in_sequence(rdrand_config, mergefiles_config)
def crawl_by_category(cateName): day = 1 data = [] while True: # print(day) text = make_request(generate_url(cateName, day)) # print(text) article_rg_str = '(?=<h3).*?(?<=</h3>)' article_regex = re.compile(article_rg_str, flags=re.I | re.S) articles = re.findall(article_regex, text) if day >= 30: break if not articles: day += 1 continue for article in articles: content = get_info(article) content["category"] = cateName data.append(content) day += 1 return data
def test_generate_url(self): expected = "http://example.com/media/foo_r200.jpg" "?fbcc428ecfa2b8a1a579a11009ffe4f164881249" result = utils.generate_url("http://example.com/media/foo.jpg", "_r200") self.assertEqual(expected, result)
import logging import inspect import requests as req from flask import Flask, request, jsonify, render_template, url_for, redirect from flask_cors import CORS __author__ = "Daniel Pérez" __email__ = "*****@*****.**" app = Flask(__name__, static_folder='browser/static', template_folder='browser/templates') CORS(app) MY_IP = req.get(generate_url('jsonip.com')).json()['ip'] API_IP = MY_IP PORT = 80 def run(): flask_options = dict(port=PORT, host='0.0.0.0') app.secret_key = hexlify(urandom(24))#hexlify(bytes('development_', encoding='latin-1')) app.run(**flask_options) @app.route('/') def root(): return redirect(url_for('index'))