def get(self): db_init = DBConnection() job_id = request.args.get('id', None) output = [] search_query = {} if job_id: search_query["job_id"] = "job_id" result = db_init.get_job({'job_id': job_id}) date_time_format = '%Y-%m-%d %H:%M:%S' for item in result: updated_time = item["updated_time"] start_time = item["start_time"] downloaded_size = item["downloaded_size"] total_file_size = item["total_file_size"] estimated_time = 0 if downloaded_size and total_file_size: print(updated_time, start_time) diff_sec = date_diff_in_s(updated_time, start_time) estimated_time = ( (float(total_file_size) - float(downloaded_size)) / float(downloaded_size)) * diff_sec db_init.close() temp = dict(item) temp["estimated_time_seconds"] = round(estimated_time, 2) return jsonify(temp)
def get(self): db_init = DBConnection() job_id = request.args.get('id', None) search_query = {} search_query["job_id"] = job_id result = db_init.get_job(search_query) print(result) out_path = "" for item in result: out_path = item["output_path"] @after_this_request def add_header(r): """ Add headers to both force latest IE rendering engine or Chrome Frame, and also to cache the rendered page for 10 minutes. """ r.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" r.headers["Pragma"] = "no-cache" r.headers["Expires"] = "0" r.headers['Cache-Control'] = 'public, max-age=0' return r response = send_from_directory(directory=os.path.dirname(out_path), filename=os.path.basename(out_path)) return response
def download_file(job_id, url, filename, already_processed): print(job_id, url, filename, "xxxxxxxxxxxxx") db_init = DBConnection() file_mode = 'wb' if already_processed == 0 else 'ab' response = requests.get(url, stream=True) total = response.headers.get('content-length') content_type = response.headers.get('Content-Type') file_extension = mimetypes.guess_extension(content_type) write_file_path = filename + file_extension is_break = False with open(write_file_path, file_mode) as f: if total is None: f.write(response.content) else: downloaded = 0 total = int(total) for data in response.iter_content( chunk_size=max(int(total / 1000), 1024 * 1024)): downloaded += len(data) update_query = {} update_query["job_id"] = job_id result = db_init.get_job({'job_id': job_id}) if result: status = result[0]['status'] if status in ["PAUSE", "STOP"]: update_query["status"] = status is_break = True if downloaded > already_processed: already_processed = 0 else: continue if is_break: db_init.update_job(update_query) break f.write(data) done = int(50 * downloaded / total) print( total, downloaded, ) update_query["total_file_size"] = total update_query["downloaded_size"] = downloaded update_query["remaining_size"] = total - downloaded db_init.update_job(update_query) if not is_break: update_query = {} update_query["job_id"] = job_id update_query["end_time"] = datetime.utcnow() update_query["status"] = 'COMPLETED' update_query["command"] = "Finished Download" db_init.close()
class Worker(object): worker_id = "" def do_the_job(self, data): job_id = data["job_id"] status = data["status"] self.db_init = DBConnection() search_query={} search_query["job_id"]=job_id result=self.db_init.get_job({'job_id':job_id}) if result: data=result[0] url=data["input_url"] already_processed=data["downloaded_size"] if status=="RESUME" else 0 download_file(self.db_init,job_id,url,already_processed) self.db_init.close() def __init__(self,): self.queue_name = 'urls' self.exchange_name = 'info' self.host = settings.RABBITMQ_HOST self.user = settings.RABBITMQ_USER self.password = settings.RABBITMQ_PASS self.credentials = pika.PlainCredentials(self.user, self.password) self.connection = pika.BlockingConnection(pika.ConnectionParameters(host=self.host, port=5672, credentials=self.credentials)) self.channel = self.connection.channel() self.channel.queue_declare(queue=self.queue_name) def callback(self, ch, method, properties, body): if body is not None or body != '': data = json.loads(body.decode()) if 'job_id' in data and 'status' in data: try: self.do_the_job(data) except Exception as e: print(str(e)) ch.basic_ack(delivery_tag=method.delivery_tag) print("done with the job by worker - ", self.worker_id)