Example #1
0
    def get(self):
        db_init = DBConnection()
        job_id = request.args.get('id', None)
        output = []
        search_query = {}
        if job_id:
            search_query["job_id"] = "job_id"

        result = db_init.get_job({'job_id': job_id})
        date_time_format = '%Y-%m-%d %H:%M:%S'
        for item in result:
            updated_time = item["updated_time"]
            start_time = item["start_time"]
            downloaded_size = item["downloaded_size"]
            total_file_size = item["total_file_size"]
            estimated_time = 0
            if downloaded_size and total_file_size:
                print(updated_time, start_time)
                diff_sec = date_diff_in_s(updated_time, start_time)
                estimated_time = (
                    (float(total_file_size) - float(downloaded_size)) /
                    float(downloaded_size)) * diff_sec
                db_init.close()
            temp = dict(item)
            temp["estimated_time_seconds"] = round(estimated_time, 2)
            return jsonify(temp)
Example #2
0
    def get(self):
        db_init = DBConnection()
        job_id = request.args.get('id', None)
        search_query = {}
        search_query["job_id"] = job_id

        result = db_init.get_job(search_query)
        print(result)
        out_path = ""
        for item in result:
            out_path = item["output_path"]

        @after_this_request
        def add_header(r):
            """
            Add headers to both force latest IE rendering engine or Chrome Frame,
            and also to cache the rendered page for 10 minutes.
            """
            r.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
            r.headers["Pragma"] = "no-cache"
            r.headers["Expires"] = "0"
            r.headers['Cache-Control'] = 'public, max-age=0'
            return r

        response = send_from_directory(directory=os.path.dirname(out_path),
                                       filename=os.path.basename(out_path))
        return response
Example #3
0
def download_file(job_id, url, filename, already_processed):
    print(job_id, url, filename, "xxxxxxxxxxxxx")
    db_init = DBConnection()
    file_mode = 'wb' if already_processed == 0 else 'ab'
    response = requests.get(url, stream=True)
    total = response.headers.get('content-length')
    content_type = response.headers.get('Content-Type')
    file_extension = mimetypes.guess_extension(content_type)
    write_file_path = filename + file_extension
    is_break = False
    with open(write_file_path, file_mode) as f:
        if total is None:
            f.write(response.content)
        else:
            downloaded = 0
            total = int(total)

            for data in response.iter_content(
                    chunk_size=max(int(total / 1000), 1024 * 1024)):
                downloaded += len(data)
                update_query = {}
                update_query["job_id"] = job_id

                result = db_init.get_job({'job_id': job_id})
                if result:
                    status = result[0]['status']
                    if status in ["PAUSE", "STOP"]:
                        update_query["status"] = status
                        is_break = True

                if downloaded > already_processed:
                    already_processed = 0
                else:
                    continue

                if is_break:
                    db_init.update_job(update_query)
                    break

                f.write(data)
                done = int(50 * downloaded / total)
                print(
                    total,
                    downloaded,
                )

                update_query["total_file_size"] = total
                update_query["downloaded_size"] = downloaded
                update_query["remaining_size"] = total - downloaded
                db_init.update_job(update_query)

    if not is_break:
        update_query = {}
        update_query["job_id"] = job_id
        update_query["end_time"] = datetime.utcnow()
        update_query["status"] = 'COMPLETED'
        update_query["command"] = "Finished Download"

    db_init.close()
Example #4
0
class Worker(object):
    worker_id = ""

    def do_the_job(self, data):
        job_id = data["job_id"]
        status = data["status"]
        self.db_init = DBConnection()
        
        search_query={}
        search_query["job_id"]=job_id
        result=self.db_init.get_job({'job_id':job_id})
        if result:
            data=result[0]
            url=data["input_url"]
            already_processed=data["downloaded_size"] if status=="RESUME" else 0
            download_file(self.db_init,job_id,url,already_processed)
            
        self.db_init.close()

    def __init__(self,):
        self.queue_name = 'urls'
        self.exchange_name = 'info'
        self.host = settings.RABBITMQ_HOST
        self.user = settings.RABBITMQ_USER
        self.password = settings.RABBITMQ_PASS

        self.credentials = pika.PlainCredentials(self.user, self.password)

        self.connection = pika.BlockingConnection(pika.ConnectionParameters(host=self.host, port=5672,
                                                                            credentials=self.credentials))

        self.channel = self.connection.channel()
        self.channel.queue_declare(queue=self.queue_name)


    def callback(self, ch, method, properties, body):

        if body is not None or body != '':
            data = json.loads(body.decode())
            if 'job_id' in data and 'status' in data:
                try:
                    self.do_the_job(data)
                except Exception as e:
                    print(str(e))

            ch.basic_ack(delivery_tag=method.delivery_tag)
            print("done with the job by worker - ", self.worker_id)