Ejemplo n.º 1
0
def scrap(request):

    content_list = []
    form = URLForm() # An unbound form

    if request.method == 'POST': # If the form has been submitted...
        form = URLForm(request.POST) # A form bound to the POST data
        if form.is_valid(): # All validation rules pass
            url = form.cleaned_data['url']
            req = Request(url)
            try:
                response = urlopen(req)
                source = BeautifulSoup(urllib2.urlopen(url).read())
            except HTTPError, e:
                template = "404.html"
                return render_to_response( template, context_instance = RequestContext( request ) )
            except URLError, e:
                template = "500.html"
                return render_to_response( template, context_instance = RequestContext( request ) )
            else:
                source = BeautifulSoup(urllib2.urlopen(url).read())


            #Read the item block
            count = 0
            for row in source('div', {'class' : 'pinMeta '}):
                temp = {}
                try:
                    temp['description'] = row.find('p').text
                except AttributeError:
                    temp['description'] = "No description"
                try:
                    temp['repincount'] = source('em', {'class' : 'socialMetaCount repinCountSmall'})[count].text
                except:
                    temp['repincount'] = 0

                try:
                    temp['pinlink'] = source('em', {'class' : 'socialMetaCount likeCountSmall'})[count].text
                except:
                    temp['pinlink'] = 0

                try:
                    temp['pincomment'] = source('em', {'class' : 'socialMetaCount commentCountSmall'})[count].text
                except:
                    temp['pincomment'] = 0

                try:
                    temp['pinnedfrom'] = source('div', {'class' : 'creditTitle'})[count].text
                except:
                    temp['pinnedfrom'] = 0

                count = count+1
                content_list.append(temp)
Ejemplo n.º 2
0
def scrap(request):


    desciption_list = []

    like_list = []
    like = ""
    
    comment_list = []
    comment = ""
    
    repin_list = []
    repin = ""

    if request.method == 'POST': # If the form has been submitted...
        form = URLForm(request.POST) # A form bound to the POST data
        if form.is_valid(): # All validation rules pass
            url = form.cleaned_data['url']
            source = BeautifulSoup(urllib2.urlopen(url).read())

            #extracting description
            for row in source('p', {'class' : 'description'}):
                desciption_list.append( row.string )


            for row in source('p', {'class' : 'stats colorless'}):
            
                #extracting number of likes
                if len( row.findAll('span')[0].string ) == 1:
                    like = "0 like"
                    like_list.append( like )
                else:
                    like = row.findAll('span')[0].string
                    like_list.append( like.strip() )

                #extracting number of comments
                if len( row.findAll('span')[1].string ) == 1:
                    comment = "0 comment"
                    comment_list.append( comment )
                else:
                    comment = row.findAll('span')[1].string
                    comment_list.append( comment.strip() )

                #extracting number of repins
                try:
                    if len( row.findAll('span')[2].string ) == 1:
                        repin = "0 repin"
                        repin_list.append( repin )
                except IndexError:
                        repin = "0 repin"
                        repin_list.append( repin )
                else:
                    repin = row.findAll('span')[2].string
                    repin_list.append( repin.strip() )


    final_result = [{"desciption_list": d, "like_list": l, "comment_list": c, "repin_list": r} for d, l, c, r in zip(desciption_list, like_list, comment_list, repin_list)]                    
    form = URLForm() # An unbound form
    
    data = {
        'form' : form,
        'final_result' : final_result,
        }
    template = "home.html"
    return render_to_response( template, data, 
                               context_instance = RequestContext( request ) )