Exemple #1
0
def refine_poetry(crawled_poetry, url, language):
    '''
    '''

    # Get image of the poetry
    f_image = download_image(get_poetry_img_url(url, language))
    if f_image is False:
        print 'ERR:: failed to download image'
        return False

    # Get text from poetry image : ocr_text
    ret = image_to_text_for_reindeer(f_image, language)
    if ret is False:
        print 'ERR: falied to convert image to text'
        return False

    ocr_text = ret.strip()

    # Delete the `f_image` file from the storage
    try:
        os.remove(f_image)
    except:
        print 'ERR:: failed to delete f_image'
        pass

    # Make list of lines from ``crawled_poetry`` and ``ocr_text``
    #print '--- ocr text ---'
    #print ocr_text
    ocr_lines = [x for x in ocr_text.split('\n') if len(x) > 0]

    cp = html_to_plain_text(crawled_poetry)
    #print '--- crawled poetry ---'
    #print cp
    crawled_lines = [x for x in cp.split('\n') if len(x) > 0]

    # Correct the line order of ``crawled_poetry``
    poetry_lines = correct_poetry_lines_order(crawled_lines, ocr_lines)
    poetry = '\n'.join(poetry_lines)

    #print '--- Final poetry ---'
    #print poetry
    # Return
    return poetry
Exemple #2
0
def refine_poetry(crawled_poetry, url, language):
    '''
    '''

    # Get image of the poetry
    f_image = download_image(get_poetry_img_url(url, language))
    if f_image is False:
        print 'ERR:: failed to download image'
        return False

    # Get text from poetry image : ocr_text
    ret = image_to_text_for_reindeer(f_image, language)
    if ret is False:
        print 'ERR: falied to convert image to text'
        return False

    ocr_text = ret.strip()

    # Delete the `f_image` file from the storage
    try:
        os.remove(f_image)
    except:
        print 'ERR:: failed to delete f_image'
        pass

    # Make list of lines from ``crawled_poetry`` and ``ocr_text``
    #print '--- ocr text ---'
    #print ocr_text
    ocr_lines = [x for x in ocr_text.split('\n') if len(x) > 0]

    cp = html_to_plain_text(crawled_poetry)
    #print '--- crawled poetry ---'
    #print cp
    crawled_lines = [x for x in cp.split('\n') if len(x) > 0]

    # Correct the line order of ``crawled_poetry``
    poetry_lines = correct_poetry_lines_order(crawled_lines, ocr_lines)
    poetry = '\n'.join(poetry_lines)

    #print '--- Final poetry ---'
    #print poetry
    # Return
    return poetry
Exemple #3
0
def article_to_poetry(request):
    '''
    Create poetries in Repository from given RawArticles
    '''
    ##
    # Check the parameters passed in the URL and process accordingly
    creator_id = request.GET.get('creator', None)
    article_ids = request.GET.get('articles', None)

    # Only AJAX POST request is allowed
    if request.is_ajax() and request.method == "POST":
        if creator_id is None or article_ids is None:
            # Return failure
            print "Error: article_to_poetry: No parameter(s) passed."
            res = {}
            res['result'] = 'failure'
            res['count'] = 0
            return JsonResponse(res)

        article_ids = [x.strip() for x in article_ids.split(',')]
        # remove comma from the
        if not article_ids[-1]:
            article_ids = article_ids[:-1]
        creator_id = creator_id.strip()
        print "article_to_poetry: creator=", creator_id, "articles=", article_ids

        count = 0

        try:
            # Get the `Person` object having id=`creator_id`
            # Return failure if not found
            person = Person.objects.get(pk=creator_id)

            if person:
                # Add all 'articles' to Poetry with 'person' as 'creator' field
                print "article_to_poetry: found ", person.name, " in person@repository"
                ct = ContentType.objects.get(app_label="repository",
                                             model="poetry")

                for article_id in article_ids:
                    article = RawArticle.objects.get(pk=article_id)

                    if article:
                        # Create a new poetry
                        poetry = Poetry()

                        poetry.name = article.title
                        poetry.language = article.language
                        poetry.body = html_to_plain_text(article.content)
                        poetry.creator = person
                        poetry.same_as = article.source_url
                        poetry.added_by = request.user
                        poetry.modified_by = request.user
                        poetry.date_modified = timezone.now()
                        poetry.save()
                        print "article_to_poetry: RawArticle ", article_id, "-> Poetry ", poetry.id

                        # Make the raw_article valid
                        article.valid = True
                        article.save()
                        count += 1

                        ##
                        # Send signal to log the action
                        sig_action.send(request.user,
                                        timestamp=poetry.date_added,
                                        verb=VERBS['ADDITION'],
                                        content_type=ct,
                                        object_id=poetry.pk,
                                        object_repr=poetry.name,
                                        change_message=None,
                                        public=True)

                # Return success with number of poetry made
                print "article_to_poetry: Total", count
                res = {}
                res['result'] = 'success'
                res['count'] = count
                return JsonResponse(res)

            else:
                # Return failure, means You have to add the entry of Person first
                print "No such creator/person found in person@repository"

                res = {}
                res['result'] = 'failure'
                res['count'] = 0
                return JsonResponse(res)

        except:
            print("Error: Unexpected error:", sys.exc_info()[0])
            for frame in traceback.extract_tb(sys.exc_info()[2]):
                fname, lineno, fn, text = frame
                print("DBG:: Error in %s on line %d" % (fname, lineno))
            res = {}
            res['result'] = 'error'
            res['count'] = 0
            return JsonResponse(res)

    return HttpResponseForbidden()
Exemple #4
0
def article_to_poetry(request):
    '''
    Create poetries in Repository from given RawArticles
    '''
    ##
    # Check the parameters passed in the URL and process accordingly
    creator_id = request.GET.get('creator', None)
    article_ids = request.GET.get('articles', None)
            
    # Only AJAX POST request is allowed  
    if request.is_ajax() and request.method == "POST":
        if creator_id is None or article_ids is None:
            # Return failure
            print "Error: article_to_poetry: No parameter(s) passed."
            res = {}
            res['result'] = 'failure'
            res['count'] = 0
            return JsonResponse(res)
        
        article_ids = [x.strip() for x in article_ids.split(',')]
        # remove comma from the 
        if not article_ids[-1]:
            article_ids = article_ids[:-1]
        creator_id = creator_id.strip()
        print "article_to_poetry: creator=", creator_id, "articles=", article_ids
        
        count = 0
        
        try:
            # Get the `Person` object having id=`creator_id`
            # Return failure if not found
            person = Person.objects.get(pk=creator_id)
            
            if person:
                # Add all 'articles' to Poetry with 'person' as 'creator' field
                print "article_to_poetry: found ", person.name, " in person@repository"
                ct = ContentType.objects.get(app_label="repository", model="poetry")
                
                for article_id in article_ids:
                    article = RawArticle.objects.get(pk=article_id)
                    
                    if article:
                        # Create a new poetry
                        poetry = Poetry()
                        
                        poetry.name = article.title
                        poetry.language = article.language
                        poetry.body = html_to_plain_text(article.content)
                        poetry.creator = person
                        poetry.same_as = article.source_url
                        poetry.added_by = request.user
                        poetry.modified_by = request.user
                        poetry.date_modified = timezone.now()
                        poetry.save()
                        print "article_to_poetry: RawArticle ", article_id, "-> Poetry ", poetry.id
                        
                        # Make the raw_article valid
                        article.valid = True
                        article.save()
                        count += 1
                        
                        ##
                        # Send signal to log the action
                        sig_action.send(request.user,
                            timestamp = poetry.date_added,
                            verb = VERBS['ADDITION'],
                            content_type = ct,
                            object_id = poetry.pk,
                            object_repr = poetry.name,
                            change_message = None,
                            public=True)
                    
                # Return success with number of poetry made
                print "article_to_poetry: Total", count
                res = {}
                res['result'] = 'success'
                res['count'] = count  
                return JsonResponse(res)
            
            else:
                # Return failure, means You have to add the entry of Person first
                print "No such creator/person found in person@repository"
                
                res = {}
                res['result'] = 'failure'
                res['count'] = 0
                return JsonResponse(res)
            
        except:
            print ("Error: Unexpected error:", sys.exc_info()[0])
            for frame in traceback.extract_tb(sys.exc_info()[2]):
                fname,lineno,fn,text = frame
                print ("DBG:: Error in %s on line %d" % (fname, lineno))
            res = {}
            res['result'] = 'error'
            res['count'] = 0
            return JsonResponse(res)
            
    return HttpResponseForbidden()