コード例 #1
0
ファイル: tasks.py プロジェクト: lawandeneel/EXIFinator
def initialize_jobs(bucket_name):
    setup_context()
    jobs_count = 0
    conn = S3Connection(AWS_ACCESS_KEY, AWS_SECRET_KEY)
    bucket = Bucket(connection=conn, name=bucket_name)
    for key in bucket.list():
        queue_job("tasks.Download", {
            "bucket_name": bucket_name,
            "key_name": key.key
        },
                  queue=download_queue)
        jobs_count += 1
    return jobs_count
コード例 #2
0
import unittest

from mrq.context import setup_context, queue_raw_jobs

setup_context()


class TestSetTask(unittest.TestCase):
    def setUp(self):
        """List of URLs as a payload for the set task. Duplicate values will not be added due to Redis Set datatype.
        """
        self.urls = [
            ['https://contentstudio.io'],
            ['https://d4interactive.io'],
            ['https://techcrunch.com'],
            ['https://mashable.com'],
            ['https://mashable.com']
        ]

        self.urls_key_value = [
            {
                'url': 'https://techcrunch.com'
            },
            {
                'url': 'https://mashable.com'
            },
            {
                'url': 'https://techcrunch.com'
            },
        ]
コード例 #3
0
ファイル: app.py プロジェクト: PUNTOZERO/imgfab
Bootstrap(app)

app.config.update({
  "DEBUG": DEBUG
})

db = mongoengine.MongoEngine(app)
app.register_blueprint(social_auth)
init_social(app, db)
app.context_processor(backends)

login_manager = login.LoginManager()
login_manager.init_app(app)

if not get_current_config():
    setup_context()


@app.route("/data/facebook/albums")
@login.login_required
def data_facebook_albums():
    return json.dumps(g.user.get_facebook_albums())


@app.route("/create_job", methods=["POST"])
# @login.login_required
def create_job():
    taskpath = request.form['path']
    taskparams = json.loads(request.form['params'])

    if taskpath.startswith("admin"):
コード例 #4
0
ファイル: add_task.py プロジェクト: ilyaglow/mrq-playground
from mrq import context
from mrq.job import queue_job

context.setup_context()

result = queue_job("crawler.Fetch", {
    "url": "http://docs.python-requests.org",
    "from": "whatever.com"
},
                   queue="crawl")

print(result)
コード例 #5
0
stock_parser = subparsers.add_parser('stock', help='Cancel Kasirga Job')
stock_parser.add_argument('--goods', dest='goods', action='store', required=True, help='JSON type parameters')
stock_parser.add_argument('--action', dest='action', action='store', required=False, default="increase",
                          help='string increase/decrease')


def order(arguments):
    prm = {
        "product": arguments.product,
        "quantity": arguments.product,
    }
    all_ids = job.queue_jobs("tasks.stock.Stock", prm, queue="low")
    return all_ids


def stock(arguments):
    goods = json.loads(arguments.goods)
    goods.update({"action": arguments.action})
    all_ids = job.queue_jobs("tasks.stock.Stock", goods, queue="low")
    return all_ids


if __name__ == '__main__':
    args = parser.parse_args()
    if args.operation == 'order':
        setup_context(file_path=BASEPATH + '/config/config.py', config_type='run')
        order(args)
    elif args.operation == 'stock':
        setup_context(file_path=BASEPATH + '/config/config.py', config_type='run')
        stock(args)
コード例 #6
0
ファイル: crawler.py プロジェクト: ilyaglow/mrq-playground
    def run(self, params):

        context.setup_context()

        collection = connections.mongodb_jobs.simple_crawler_urls

        response = requests.get(params["url"])

        if response.status_code != 200:
            log.warning(
                "Got status %s on page %s (Queued from %s)" %
                (response.status_code, response.url, params.get("from")))
            return False

        # Store redirects
        if response.url != params["url"]:
            collection.update({"_id": params["url"]}, {
                "$set": {
                    "redirected_to": response.url,
                    "fetched_date": datetime.datetime.now()
                }
            })

        document = lxml.html.fromstring(response.content)

        document.make_links_absolute(response.url)

        queued_count = 0

        document_domain = urlparse.urlparse(response.url).netloc

        for (element, attribute, link, pos) in document.iterlinks():

            link = re.sub("#.*", "", link or "")

            if not link:
                continue

            domain = urlparse.urlparse(link).netloc

            # Don't follow external links for this example
            if domain != document_domain:
                continue

            # We don't want to re-queue URLs twice. If we try to insert a duplicate,
            # pymongo will throw an error
            try:
                collection.insert({"_id": link})
            except:
                continue

            queue_job("crawler.Fetch", {
                "url": link,
                "from": params["url"]
            },
                      queue="crawl")
            queued_count += 1

        stored_data = {
            "_id": response.url,
            "queued_urls": queued_count,
            "html_length": len(response.content),
            "fetched_date": datetime.datetime.now()
        }

        collection.update({"_id": response.url}, stored_data, upsert=True)

        return True