Beispiel #1
0
def mazda3_grab(threaddb='mazda3_threadas', debug=True):
    #pdb.set_trace()
    bmw = mazda3()
    main_page = xmlTree(bmw.domain)
    lvl1 = main_page.xpath(bmw.linklist)
    if not debug:
        udb = DBManager()
    for lvl1_link in lvl1:
        next_page = xmlTree(lvl1_link)
        count = 0
        pos = lvl1_link.find('board=') + len('board=')
        part = lvl1_link[:pos]
        fid = lvl1_link[pos:]
        ifid = fid.split(r'.')

        try:
            lastpage = int(
                next_page.xpath(bmw.last_page_threads)[0].text_content())
        except IndexError:
            lastpage = 1
        while count < lastpage:

            #threads_list = next_page.xpath(bmw.threaddata)
            threads = next_page.xpath(bmw.threads_list)
            tread_data = next_page.xpath(bmw.threaddata)
            for j in tread_data:
                #pass
                threaddata = []
                if j.xpath(bmw.description):
                    threaddata.append(('Description', j.xpath(
                        bmw.description)[0].text_content().encode('utf-8')))
                    #print j.xpath(bmw.description)[0].text_content().encode('utf-8')
                    threaddata.append(
                        ('Views',
                         bmw.parse_stats(j.xpath(
                             bmw.stats)[0].text_content())['Views']))
                    threaddata.append(
                        ('Views',
                         bmw.parse_stats(j.xpath(
                             bmw.stats)[0].text_content())['Replies']))
                    #                    print bmw.parse_stats(j.xpath(bmw.stats)[0].text_content())
                    #print j.xpath(bmw.views)[0].text_content()
                    threaddata.append(('Link', j.xpath(bmw.link)[0]))
                    #                    print j.xpath(bmw.link)[0]
                    mazda3_thread_grab(bmw.thread_id(j.xpath(bmw.link)[0]))
                if not debug:
                    udb.insert_into_table(threaddb, threaddata)
                    #pass
                else:
                    print threaddata
            count += 1
            print count
            print part + addto(ifid, count)
            next_page = xmlTree(part + addto(ifid, count))
    if not debug:
        udb.close()
Beispiel #2
0
def mazda3_thread_grab(url, postsdb='mazda3_posts', debug=True):
    bimm = mazda3()
    next_page = xmlTree(url)
    lvl1 = next_page.xpath(bimm.postdata)
    count = 0
    pos = url.find('topic=') + len('topic=')
    part = url[:pos]
    fid = url[pos:]
    ifid = fid.split(r'.')
    if not debug:
        udb = DBManager()
    try:
        lastpage = int(
            next_page.xpath(bimm.last_page_threads)[0].text_content())
    except IndexError:
        lastpage = 1

    while count < lastpage:
        posts = next_page.xpath(bimm.postdata)
        for pst in posts:
            threaddata = []
            threaddata.append(('TimeOfPost',bimm.convert_to_valid_date(bimm.last_activity(bimm.time_of_post(pst.xpath(\
                                                        bimm.timeofpost)[0].text_content())))))
            #print bimm.time_of_post(pst.xpath(bimm.timeofpost)[0].text_content())
            threaddata.append(
                ('PosterID', bimm.poster_id(pst.xpath(bimm.posterid)[0])))
            #print pst.xpath(bimm.posterid)[0]
            threaddata.append(
                ('PostID', bimm.post_id(pst.xpath(bimm.postid)[0])))
            #print pst.xpath(bimm.postid)[0]
            threaddata.append(('Link', pst.xpath(bimm.postlink)[0]))
            #print pst.xpath(bimm.postlink)[0]
            threaddata.append(
                ('PostCountInThread',
                 bimm.post_count(pst.xpath(bimm.postcount)[0].text_content())))
            threaddata.append(
                ('ThreadID', bimm.thread_id(pst.xpath(bimm.post_thread)[0])))
            #print bimm.post_count(pst.xpath(bimm.postcount)[0].text_content())
            mazda3_user_grab(pst.xpath(bimm.posterid)[0], 'mazda3_users')
            if not debug:
                udb.insert_into_table(postsdb, threaddata)
            else:
                print threaddata
        count += 1
        #print count
        #print part+addto(ifid,count)
        next_page = xmlTree(part + addto(ifid, count))
    if not debug:
        udb.close()
Beispiel #3
0
def mazda3_grab(threaddb = 'mazda3_threadas', debug =True):
    #pdb.set_trace()
    bmw = mazda3()
    main_page = xmlTree(bmw.domain)
    lvl1 = main_page.xpath(bmw.linklist)
    if not debug:
        udb = DBManager()
    for lvl1_link in lvl1:
        next_page = xmlTree(lvl1_link)
        count = 0
        pos = lvl1_link.find('board=') + len('board=')
        part = lvl1_link[:pos]
        fid = lvl1_link[pos:]
        ifid = fid.split(r'.')
    
        try:
            lastpage = int(next_page.xpath(bmw.last_page_threads)[0].text_content())
        except IndexError:
            lastpage = 1
        while count<lastpage:
      
            #threads_list = next_page.xpath(bmw.threaddata)
            threads = next_page.xpath(bmw.threads_list)
            tread_data = next_page.xpath(bmw.threaddata)
            for j in tread_data:
                #pass
                threaddata = []
                if j.xpath(bmw.description):
                    threaddata.append(('Description',j.xpath(bmw.description)[0].text_content().encode('utf-8')))
                    #print j.xpath(bmw.description)[0].text_content().encode('utf-8')
                    threaddata.append(('Views', bmw.parse_stats(j.xpath(bmw.stats)[0].text_content())['Views']))
                    threaddata.append(('Views', bmw.parse_stats(j.xpath(bmw.stats)[0].text_content())['Replies']))
#                    print bmw.parse_stats(j.xpath(bmw.stats)[0].text_content())
                    #print j.xpath(bmw.views)[0].text_content()
                    threaddata.append(('Link', j.xpath(bmw.link)[0]))
#                    print j.xpath(bmw.link)[0]
                    mazda3_thread_grab(bmw.thread_id(j.xpath(bmw.link)[0]))
                if not debug:
                    udb.insert_into_table(threaddb, threaddata)
                    #pass
                else:
                    print threaddata
            count += 1
            print count
            print part+addto(ifid,count)
            next_page = xmlTree(part+addto(ifid,count))
    if not debug:
        udb.close()
Beispiel #4
0
def mazda3_user_grab(url, userdb = 'mazda3_users',debug = True):
    bimm = mazda3()
    main_page = xmlTree_w_login(url, 'mazda', 'somebodyis', 'anybodyanybody')
    userdata = []
    if main_page:
            if not debug:
                udb = DBManager()
            #pdb.set_trace()
            if len(main_page.xpath(bimm.location))>0:
                #print main_page.xpath(bimm.location)[0].text_content().strip()
                userdata.append(('Location', main_page.xpath(bimm.location)[0].text_content().strip()))
            if len(main_page.xpath(bimm.car_make))>0:
                #print main_page.xpath(bimm.cars)[0].text_content()
                userdata.append(('Cars', main_page.xpath(bimm.car_make)[0].text_content()\
                                   +main_page.xpath(bimm.car_year)[0].text_content()))
        #   if len(main_page.xpath(bimm.interests))>0:
        #       #print main_page.xpath(bimm.interests)[0].text_content()
        #       userdata.append(('Interests', main_page.xpath(bimm.interests)[0].text_content()))
            if len(main_page.xpath(bimm.noposts))>0:
                #print bimm.total_posts(main_page.xpath(bimm.noposts)[0].text_content())
                userdata.append(('TotalPosts', bimm.total_posts(main_page.xpath(bimm.noposts)[0].text_content())))
            if len(main_page.xpath(bimm.lastac))>0:
                #print bimm.last_activity(main_page.xpath(bimm.lastac)[0].text_content())
                userdata.append(('LastActivity', bimm.convert_to_valid_date(bimm.last_activity(main_page.xpath(bimm.lastac)[0].text_content()))))
            if len(main_page.xpath(bimm.joindate))>0:
                #print bimm.join_date(main_page.xpath(bimm.joindate)[0].text_content())
                userdata.append(('JoinDate', bimm.convert_to_valid_date(bimm.join_date(main_page.xpath(bimm.joindate)[0].text_content()))))
                #if len(main_page.xpath(bimm.ppday))>0:
                #  print bimm.p_p_day(main_page.xpath(bimm.ppday)[0].text_content())
            #if len(main_page.xpath(bimm.cars))>0:
                #  print main_page.xpath(bimm.cars)[0].text_content()
            if len(main_page.xpath(bimm.handle))>0:
                #print bimm.get_handle(main_page.xpath(bimm.handle)[0].text_content())
                userdata.append(('Handle', bimm.get_handle(main_page.xpath(bimm.handle)[0])))
            if len(main_page.xpath(bimm.minus_fb))>0:
                userdata.append(('NegativeFeedback', main_page.xpath(bimm.minus_fb)[0].text_content().strip()))
            if len(main_page.xpath(bimm.plus_fb))>0:
                userdata.append(('PositiveFeedback', main_page.xpath(bimm.plus_fb)[0].text_content().strip()))
            if not debug:
                udb.insert_into_table(userdb, userdata)
                udb.close()
            else:
                print userdata
    else:
        pass
Beispiel #5
0
def mazda3_thread_grab(url, postsdb = 'mazda3_posts', debug = True):
        bimm = mazda3()
        next_page = xmlTree(url)
        lvl1 = next_page.xpath(bimm.postdata)
        count = 0
        pos = url.find('topic=') + len('topic=')
        part = url[:pos]
        fid = url[pos:]
        ifid = fid.split(r'.')
        if not debug:
            udb = DBManager()
        try:
            lastpage = int(next_page.xpath(bimm.last_page_threads)[0].text_content())
        except IndexError:
                lastpage = 1 
        
        while count<lastpage:
                posts = next_page.xpath(bimm.postdata)
                for pst in posts:
                        threaddata = []
                        threaddata.append(('TimeOfPost',bimm.convert_to_valid_date(bimm.last_activity(bimm.time_of_post(pst.xpath(\
                                                                    bimm.timeofpost)[0].text_content())))))
                        #print bimm.time_of_post(pst.xpath(bimm.timeofpost)[0].text_content())
                        threaddata.append(('PosterID',bimm.poster_id(pst.xpath(bimm.posterid)[0])))
                        #print pst.xpath(bimm.posterid)[0]
                        threaddata.append(('PostID',bimm.post_id(pst.xpath(bimm.postid)[0])))
                        #print pst.xpath(bimm.postid)[0]
                        threaddata.append(('Link',pst.xpath(bimm.postlink)[0]))
                        #print pst.xpath(bimm.postlink)[0]
                        threaddata.append(('PostCountInThread',bimm.post_count(pst.xpath(bimm.postcount)[0].text_content())))
                        threaddata.append(('ThreadID', bimm.thread_id(pst.xpath(bimm.post_thread)[0])))
                        #print bimm.post_count(pst.xpath(bimm.postcount)[0].text_content())
                        mazda3_user_grab(pst.xpath(bimm.posterid)[0],'mazda3_users')
                        if not debug:
                            udb.insert_into_table(postsdb, threaddata)
                        else:
                            print threaddata
                count += 1
                #print count
                #print part+addto(ifid,count)
                next_page = xmlTree(part+addto(ifid,count))
        if not debug:
            udb.close()
Beispiel #6
0
def mazda3_user_grab(url, userdb='mazda3_users', debug=True):
    bimm = mazda3()
    main_page = xmlTree_w_login(url, 'mazda', 'somebodyis', 'anybodyanybody')
    userdata = []
    if main_page:
        if not debug:
            udb = DBManager()
        #pdb.set_trace()
        if len(main_page.xpath(bimm.location)) > 0:
            #print main_page.xpath(bimm.location)[0].text_content().strip()
            userdata.append(
                ('Location',
                 main_page.xpath(bimm.location)[0].text_content().strip()))
        if len(main_page.xpath(bimm.car_make)) > 0:
            #print main_page.xpath(bimm.cars)[0].text_content()
            userdata.append(('Cars', main_page.xpath(bimm.car_make)[0].text_content()\
                               +main_page.xpath(bimm.car_year)[0].text_content()))
    #   if len(main_page.xpath(bimm.interests))>0:
    #       #print main_page.xpath(bimm.interests)[0].text_content()
    #       userdata.append(('Interests', main_page.xpath(bimm.interests)[0].text_content()))
        if len(main_page.xpath(bimm.noposts)) > 0:
            #print bimm.total_posts(main_page.xpath(bimm.noposts)[0].text_content())
            userdata.append(
                ('TotalPosts',
                 bimm.total_posts(
                     main_page.xpath(bimm.noposts)[0].text_content())))
        if len(main_page.xpath(bimm.lastac)) > 0:
            #print bimm.last_activity(main_page.xpath(bimm.lastac)[0].text_content())
            userdata.append(
                ('LastActivity',
                 bimm.convert_to_valid_date(
                     bimm.last_activity(
                         main_page.xpath(bimm.lastac)[0].text_content()))))
        if len(main_page.xpath(bimm.joindate)) > 0:
            #print bimm.join_date(main_page.xpath(bimm.joindate)[0].text_content())
            userdata.append(
                ('JoinDate',
                 bimm.convert_to_valid_date(
                     bimm.join_date(
                         main_page.xpath(bimm.joindate)[0].text_content()))))
            #if len(main_page.xpath(bimm.ppday))>0:
            #  print bimm.p_p_day(main_page.xpath(bimm.ppday)[0].text_content())
        #if len(main_page.xpath(bimm.cars))>0:
        #  print main_page.xpath(bimm.cars)[0].text_content()
        if len(main_page.xpath(bimm.handle)) > 0:
            #print bimm.get_handle(main_page.xpath(bimm.handle)[0].text_content())
            userdata.append(
                ('Handle', bimm.get_handle(main_page.xpath(bimm.handle)[0])))
        if len(main_page.xpath(bimm.minus_fb)) > 0:
            userdata.append(
                ('NegativeFeedback',
                 main_page.xpath(bimm.minus_fb)[0].text_content().strip()))
        if len(main_page.xpath(bimm.plus_fb)) > 0:
            userdata.append(
                ('PositiveFeedback',
                 main_page.xpath(bimm.plus_fb)[0].text_content().strip()))
        if not debug:
            udb.insert_into_table(userdb, userdata)
            udb.close()
        else:
            print userdata
    else:
        pass