def refine_poetry(crawled_poetry, url, language): ''' ''' # Get image of the poetry f_image = download_image(get_poetry_img_url(url, language)) if f_image is False: print 'ERR:: failed to download image' return False # Get text from poetry image : ocr_text ret = image_to_text_for_reindeer(f_image, language) if ret is False: print 'ERR: falied to convert image to text' return False ocr_text = ret.strip() # Delete the `f_image` file from the storage try: os.remove(f_image) except: print 'ERR:: failed to delete f_image' pass # Make list of lines from ``crawled_poetry`` and ``ocr_text`` #print '--- ocr text ---' #print ocr_text ocr_lines = [x for x in ocr_text.split('\n') if len(x) > 0] cp = html_to_plain_text(crawled_poetry) #print '--- crawled poetry ---' #print cp crawled_lines = [x for x in cp.split('\n') if len(x) > 0] # Correct the line order of ``crawled_poetry`` poetry_lines = correct_poetry_lines_order(crawled_lines, ocr_lines) poetry = '\n'.join(poetry_lines) #print '--- Final poetry ---' #print poetry # Return return poetry
def article_to_poetry(request): ''' Create poetries in Repository from given RawArticles ''' ## # Check the parameters passed in the URL and process accordingly creator_id = request.GET.get('creator', None) article_ids = request.GET.get('articles', None) # Only AJAX POST request is allowed if request.is_ajax() and request.method == "POST": if creator_id is None or article_ids is None: # Return failure print "Error: article_to_poetry: No parameter(s) passed." res = {} res['result'] = 'failure' res['count'] = 0 return JsonResponse(res) article_ids = [x.strip() for x in article_ids.split(',')] # remove comma from the if not article_ids[-1]: article_ids = article_ids[:-1] creator_id = creator_id.strip() print "article_to_poetry: creator=", creator_id, "articles=", article_ids count = 0 try: # Get the `Person` object having id=`creator_id` # Return failure if not found person = Person.objects.get(pk=creator_id) if person: # Add all 'articles' to Poetry with 'person' as 'creator' field print "article_to_poetry: found ", person.name, " in person@repository" ct = ContentType.objects.get(app_label="repository", model="poetry") for article_id in article_ids: article = RawArticle.objects.get(pk=article_id) if article: # Create a new poetry poetry = Poetry() poetry.name = article.title poetry.language = article.language poetry.body = html_to_plain_text(article.content) poetry.creator = person poetry.same_as = article.source_url poetry.added_by = request.user poetry.modified_by = request.user poetry.date_modified = timezone.now() poetry.save() print "article_to_poetry: RawArticle ", article_id, "-> Poetry ", poetry.id # Make the raw_article valid article.valid = True article.save() count += 1 ## # Send signal to log the action sig_action.send(request.user, timestamp=poetry.date_added, verb=VERBS['ADDITION'], content_type=ct, object_id=poetry.pk, object_repr=poetry.name, change_message=None, public=True) # Return success with number of poetry made print "article_to_poetry: Total", count res = {} res['result'] = 'success' res['count'] = count return JsonResponse(res) else: # Return failure, means You have to add the entry of Person first print "No such creator/person found in person@repository" res = {} res['result'] = 'failure' res['count'] = 0 return JsonResponse(res) except: print("Error: Unexpected error:", sys.exc_info()[0]) for frame in traceback.extract_tb(sys.exc_info()[2]): fname, lineno, fn, text = frame print("DBG:: Error in %s on line %d" % (fname, lineno)) res = {} res['result'] = 'error' res['count'] = 0 return JsonResponse(res) return HttpResponseForbidden()
def article_to_poetry(request): ''' Create poetries in Repository from given RawArticles ''' ## # Check the parameters passed in the URL and process accordingly creator_id = request.GET.get('creator', None) article_ids = request.GET.get('articles', None) # Only AJAX POST request is allowed if request.is_ajax() and request.method == "POST": if creator_id is None or article_ids is None: # Return failure print "Error: article_to_poetry: No parameter(s) passed." res = {} res['result'] = 'failure' res['count'] = 0 return JsonResponse(res) article_ids = [x.strip() for x in article_ids.split(',')] # remove comma from the if not article_ids[-1]: article_ids = article_ids[:-1] creator_id = creator_id.strip() print "article_to_poetry: creator=", creator_id, "articles=", article_ids count = 0 try: # Get the `Person` object having id=`creator_id` # Return failure if not found person = Person.objects.get(pk=creator_id) if person: # Add all 'articles' to Poetry with 'person' as 'creator' field print "article_to_poetry: found ", person.name, " in person@repository" ct = ContentType.objects.get(app_label="repository", model="poetry") for article_id in article_ids: article = RawArticle.objects.get(pk=article_id) if article: # Create a new poetry poetry = Poetry() poetry.name = article.title poetry.language = article.language poetry.body = html_to_plain_text(article.content) poetry.creator = person poetry.same_as = article.source_url poetry.added_by = request.user poetry.modified_by = request.user poetry.date_modified = timezone.now() poetry.save() print "article_to_poetry: RawArticle ", article_id, "-> Poetry ", poetry.id # Make the raw_article valid article.valid = True article.save() count += 1 ## # Send signal to log the action sig_action.send(request.user, timestamp = poetry.date_added, verb = VERBS['ADDITION'], content_type = ct, object_id = poetry.pk, object_repr = poetry.name, change_message = None, public=True) # Return success with number of poetry made print "article_to_poetry: Total", count res = {} res['result'] = 'success' res['count'] = count return JsonResponse(res) else: # Return failure, means You have to add the entry of Person first print "No such creator/person found in person@repository" res = {} res['result'] = 'failure' res['count'] = 0 return JsonResponse(res) except: print ("Error: Unexpected error:", sys.exc_info()[0]) for frame in traceback.extract_tb(sys.exc_info()[2]): fname,lineno,fn,text = frame print ("DBG:: Error in %s on line %d" % (fname, lineno)) res = {} res['result'] = 'error' res['count'] = 0 return JsonResponse(res) return HttpResponseForbidden()