def mass_upload(): subtitle = "Upload several books to Internet Archive" form = MassUpload(request.form) if form.validate(): if form.email.data: if validate_email(form.email.data) != True: flash(error_msg(5)) session['timestamp'] = datetime.now() return render_template('mass_upload.html', subtitle=subtitle, form=form, _errorNo=5) redis = redis_py.Redis() redis_key4 = keys.redis_key4 q = redis_py.Queue(redis_key4) q.add(json.dumps((form.ids.data, form.email.data, form.language.data))) session['timestamp'] = datetime.now() flash(error_msg(100, email=form.email.data)) return render_template('mass_upload.html', subtitle=subtitle, form=form) return render_template('mass_upload.html', subtitle=subtitle, form=form, _errorNo=0)
def index(): subtitle = "Transfer books to Internet Archive" form = UploadForm(request.form) if request.method == 'POST': if form.validate(): book_id = html_escape(form.book_id.data) email = html_escape( form.email.data) if form.email.data not in (None, "") else "" book = bridge.fields(form.library_id.data, book_id, email) book_id = book.Id if book.Id not in (None, "") else book_id redis = redis_py.Redis() redis_key3 = keys.redis_key3 book_key = "%s:%s:%s" % (redis_key3, form.library_id.data, book_id) library_url_key = book_key + ":library_url" redis_py.set(library_url_key, form.book_id.data, True) redis.expire(library_url_key, 60 * 15) verification_status_no = book.verify_fields() if verification_status_no != 0: flash(error_msg(verification_status_no, book)) return render_template("index.html", subtitle=subtitle, libraries=libraries, form=form) else: if redundant_request(book) == True: flash(error_msg(50)) return render_template("index.html", subtitle=subtitle, libraries=libraries, form=form) book_info = bridge.book_info(form.library_id.data, book_id) if isinstance(book_info, (int, long, float, complex)): flash(error_msg(book_info)) return render_template("index.html", subtitle=subtitle, libraries=libraries, form=form) confirm_resp = make_response( minify( render_template("confirm.html", book_info=book_info, form=form))) uuid = urandom(32).encode("hex") confirm_resp.set_cookie('bub_session', uuid) store_request(book, uuid) return confirm_resp return render_template("index.html", subtitle=subtitle, libraries=libraries, form=form) else: return render_template("index.html", subtitle=subtitle, libraries=libraries, form=form)
def store_book_metadata(library_id, book_id, sno): """Store books metadata for caching purposes""" redis = redis_py.Redis() book_metadata = bridge.book_info(library_id, book_id) redis_key3 = keys.redis_key3 book_key = "%s:%s:%s" % (redis_key3, library_id, book_id) metadata_key = book_key + ":meta_data" sno_key = book_key + ":sno" redis_py.set(metadata_key, json.dumps(book_metadata), True) redis_py.set(sno_key, json.dumps(sno), True) library_url_key = book_key + ":library_url" redis.expire(library_url_key, 60 * 60 * 60)
def reupload(book_id, email="", key=""): subtitle = "Reupload book to Internet Archive" form = ReUpload() if email in (None, "") and key not in (None, ""): return render_template('reupload.html', subtitle=subtitle, form=form) if key != hashlib.md5(str(email) + str(keys.flask_app_secret)).hexdigest( ) and key not in (None, ""): return render_template('reupload.html', subtitle=subtitle, form=form) form = ReUpload() if form.validate() or key not in (None, ""): book_values = re.search("(.*):(.*)", book_id) library = book_values.group(1) Id = book_values.group(2) ia_identifier_suffix = get_valid_identifier_suffix(library, Id) reset_book_progress(library, ia_identifier_suffix) redis = redis_py.Redis() redis_key3 = keys.redis_key3 book_metadata = redis_py.get(redis_key3 + ":" + book_id + ":meta_data", True) book_key = "%s:%s:%s" % (redis_key3, library, ia_identifier_suffix) metadata_key = book_key + ":meta_data" book_request_key = book_key + ":requests" redis_py.set(metadata_key, book_metadata, True) request = dict(email=form.email.data) redis_py.sadd(book_request_key, json.dumps(request), request_cache=True) book = models.Book(book_id=ia_identifier_suffix, library=library, requests=json.dumps(request), meta_data=book_metadata) db.session.add(book) db.session.commit() redis_key1 = keys.redis_key1 q_global_job = redis_py.Queue(redis_key1 + "global") q_global_job.add( json.dumps(dict(library=library, book_id=ia_identifier_suffix))) q = redis_py.Queue(redis_key1) q.add( json.dumps( dict(library=library, Id=Id, ia_identifier_suffix=ia_identifier_suffix))) flash(error_msg(100, email=form.email.data)) return redirect( url_for('progress', book_id=library + ":" + ia_identifier_suffix)) else: return render_template('reupload.html', subtitle=subtitle, form=form)
def reset_book_progress(library, ia_identifier): r = redis_py.Redis() redis_key3 = keys.redis_key3 book_key = "%s:%s:%s" % (redis_key3, library, ia_identifier) download_progress_key = book_key + ":download_progress" upload_progress_key = book_key + ":upload_progress" ia_response_key = book_key + ":ia_response" OCR_progress_key = book_key + ":OCR_progress" email_progress_key = book_key + ":email_progress" r.delete(download_progress_key) r.delete(upload_progress_key) r.delete(ia_response_key) r.delete(OCR_progress_key) r.delete(email_progress_key) d.execute( "delete from book where email_progress=1 and library=%s and book_id=%s;", library, ia_identifier)
def submit_job(stored_request): """Add book-request to the job queues""" redis_key1 = keys.redis_key1 lock_key1 = keys.lock_key1 q = redis_py.Queue(redis_key1) q_global_job = redis_py.Queue(redis_key1 + "global") redis = redis_py.Redis() redis_key3 = keys.redis_key3 book_key = "%s:%s:%s" % (redis_key3, stored_request.library, stored_request.book_id) book_request_key = book_key + ":requests" Lock = redis_py.Lock(lock_key1) locked = False if redis.exists(book_request_key): locked = Lock.acquire(timeout=60) request = dict(sno=stored_request.sno, email=stored_request.email) redis_py.sadd(book_request_key, json.dumps(request), request_cache=True) redundant_request = models.Request.query.filter_by( md5_book=stored_request.md5_book, confirmed=1, job_submitted=1).first() if redundant_request == None: reset_book_progress(stored_request.library, stored_request.book_id) md5_book = hashlib.md5(stored_request.book_id + stored_request.library).hexdigest() library_url_key = book_key + ":library_url" library_url = redis_py.get(library_url_key, True) metadata_key = book_key + ":meta_data" meta_data = redis_py.get(metadata_key, True) book = models.Book(book_id=stored_request.book_id, library=stored_request.library, md5_book=stored_request.md5_book, connected_request_sno=stored_request.sno, requests=json.dumps(request), library_url=library_url, meta_data=meta_data) db.session.add(book) #db.session.commit() q.add(stored_request.sno) stored_request.job_submitted = 1 db.session.commit() q_global_job.add( json.dumps( dict(library=stored_request.library, book_id=stored_request.book_id))) if locked == True: Lock.release()
def queue(number_of_entries=100): subtitle = "Queue for web-based upload jobs" redis_key1 = keys.redis_key1 redis_key3 = keys.redis_key3 q_global_job = redis_py.Queue(redis_key1 + "global") redis = redis_py.Redis() queue = q_global_job.pop(int(number_of_entries)) total_OCR_waiting = 0 total_waiting_to_run = 0 currently_running = 0 ongoing_job_identifier = redis.get(redis_key3 + ":ongoing_job_identifier") if queue: for index, item in enumerate(queue): item = json.loads(item) queue[index] = item upload_progress = redis_py.get( "%s:%s:%s:upload_progress" % (redis_key3, item['library'], item['book_id']), True) OCR_progress = redis_py.get( "%s:%s:%s:OCR_progress" % (redis_key3, item['library'], item['book_id']), True) if upload_progress == '1' and OCR_progress != '1': queue[index].update(OCR_waiting=1) total_OCR_waiting += 1 else: total_waiting_to_run += 1 total_waiting_to_run = total_waiting_to_run - 1 if ( index + 1) != total_OCR_waiting else total_waiting_to_run currently_running = 1 if (index + 1) != total_OCR_waiting else 0 return render_template("queue.html", subtitle=subtitle, total_OCR_waiting=total_OCR_waiting, total_waiting_to_run=total_waiting_to_run, queue=queue, auto_refresh=30, present_id=ongoing_job_identifier, number_of_entries=int(number_of_entries), currently_running=currently_running)
def progress(book_id): redis = redis_py.Redis() redis_key3 = keys.redis_key3 redis_key1 = keys.redis_key1 q_global_job = redis_py.Queue(redis_key1 + "global") book_key = "%s:%s" % (redis_key3, book_id) library = book_id.split(':')[0] metadata_key = book_key + ":meta_data" metadata = redis_py.get(metadata_key, True) if metadata == None: abort(404) metadata = json.loads(metadata) if metadata['title']: if len(metadata['title']) > 70: subtitle = metadata['title'][:65] + ".." else: subtitle = metadata['title'] progress = dict() ia_response_key = book_key + ":ia_response" ia_response = redis_py.get(ia_response_key, True) ia_response = int(ia_response) if ia_response else None progress.update(dict(ia_response=ia_response)) download_progress_key = book_key + ":download_progress" download_progress = redis_py.get( download_progress_key, True) if ia_response == 0 or ia_response == 3 else None download_progress = int(download_progress) if download_progress else None progress.update(dict(download_progress=download_progress)) upload_progress_key = book_key + ":upload_progress" upload_progress = redis_py.get( upload_progress_key, True) if ia_response == 0 or ia_response == 3 else None upload_progress = int(upload_progress) if upload_progress else None progress.update(dict(upload_progress=upload_progress)) ia_identifier_key = book_key + ":ia_identifier" try: ia_identifier = json.loads(redis_py.get(ia_identifier_key, True)) ia_link = "http://archive.org/details/%s" % ia_identifier if ia_identifier else "" progress.update(dict(ia_link=ia_link)) progress.update(dict(ia_identifier=ia_identifier)) except ValueError: ia_identifier = redis_py.get(ia_identifier_key, True) ia_link = "http://archive.org/details/%s" % ia_identifier if ia_identifier else "" progress.update(dict(ia_link=ia_link)) progress.update(dict(ia_identifier=ia_identifier)) except: pass ia_identifier_suffix = book_id.split(':')[-1] OCR_progress_key = book_key + ":OCR_progress" OCR_progress = redis_py.get( OCR_progress_key, True) if ia_response == 0 or ia_response == 3 else None OCR_progress = int(OCR_progress) if OCR_progress else None progress.update(dict(OCR_progress=OCR_progress)) email_progress_key = book_key + ":email_progress" email_progress = redis_py.get(email_progress_key, True) email_progress = int(email_progress) if email_progress else None progress.update(dict(email_progress=email_progress)) sno_key = book_key + ":sno" sno = redis_py.get(sno_key, True) sno = int(sno) if sno else None global_queue_key = json.dumps( dict(library=library, book_id=ia_identifier_suffix)) progress.update(dict(queue_index=q_global_job.index(global_queue_key))) progress.update( dict(percent_complete=percent_complete(ia_response, download_progress, upload_progress, OCR_progress))) auto_refresh = auto_refresh_time(ia_response) return render_template("progress.html", subtitle=subtitle, book_info=metadata, progress=progress, auto_refresh=auto_refresh, request=request, book_id=book_id)
def manual(): subtitle = "Upload using wildcards" try: form = WildcardForm(request.form) except UnboundLocalError: form = WildcardForm() return render_template("wildcard.html", subtitle=subtitle, form=form) if request.method == 'POST': if form.validate(): if form.book_url.data in (None, "") and form.book_pdf_url.data in (None, ""): return render_template("wildcard.html", subtitle=subtitle, form=form) if form.link_type.data == 'wildcard': book_url = form.book_url.data if form.book_url.data not in ( None, "") else "" from_no = html_escape( form.from_no.data) if form.from_no.data not in (None, "") else "" to_no = html_escape( form.to_no.data) if form.to_no.data not in (None, "") else "" info_url = re.sub('\(\*\)', str(from_no), book_url) book_url = book_url + ":" + str(from_no) + "," + str(to_no) book_url = book_url + ":" + 'wildcard' elif form.link_type.data == 'pdf': book_url = form.book_pdf_url.data if form.book_pdf_url.data not in ( None, "") else "" info_url = book_url book_url = book_url + ":" + 'pdf' library = 'man' Id = hashlib.md5(book_url).hexdigest() email = html_escape( form.email.data) if form.email.data not in (None, "") else "" book = bridge.fields(library, book_url, email) verification_status_no = book.verify_fields() if verification_status_no != 0: flash(error_msg(verification_status_no, book)) return render_template("wildcard.html", subtitle=subtitle, form=form) tld = extract_base_domain(book_url) book_metadata = dict( image_url="", thumbnail_url="", printType="BOOK", subtitle="", infoLink=info_url, publicDomain=True, scanner=tld, sponser=tld, title=form.title.data if form.title.data not in (None, "") else "", author=form.author.data if form.author.data not in (None, "") else "", publisher=form.publisher.data if form.publisher.data not in (None, "") else "", publishedDate=form.date.data if form.date.data not in (None, "") else "", description=form.desc.data if form.desc.data not in (None, "") else "", language=form.language.data if form.language.data not in (None, "") else "") ia_identifier_suffix = get_valid_identifier_suffix(library, Id) redis = redis_py.Redis() redis_key3 = keys.redis_key3 redis_key1 = keys.redis_key1 book_key = "%s:%s:%s" % (redis_key3, library, ia_identifier_suffix) metadata_key = book_key + ":meta_data" redis_py.set(metadata_key, json.dumps(book_metadata), True) book_request_key = book_key + ":requests" request_details = dict(email=email) redis_py.sadd(book_request_key, json.dumps(request_details), request_cache=True) md5_book = hashlib.md5(ia_identifier_suffix + library).hexdigest() meta_data = json.dumps(book_metadata) book = models.Book(book_id=ia_identifier_suffix, library=library, requests=json.dumps(request_details), meta_data=meta_data, md5_book=md5_book) db.session.add(book) db.session.commit() q_global_job = redis_py.Queue(redis_key1 + "global") q_global_job.add( json.dumps(dict(library=library, book_id=ia_identifier_suffix))) q = redis_py.Queue(redis_key1) q.add( json.dumps( dict(library=library, Id=book_url, ia_identifier_suffix=ia_identifier_suffix))) flash(error_msg(100, email=email)) return redirect( url_for('progress', book_id=library + ":" + ia_identifier_suffix)) else: return render_template("wildcard.html", subtitle=subtitle, form=form) else: return render_template("wildcard.html", subtitle=subtitle, form=form)