Python get_page_id Examples, scrapy_fatsecret.common_lib.get_page_id Python Examples

Example #1

0

Show file

File: buddies.py Project: thalesfc/scrapy-fatsecret

def parse_buddy(response):
    item = BuddyItem()
    item['id'] = common_lib.get_page_id(response)
    item['user_id'] = common_lib.get_user_id(response)

    buddies = response.xpath('//b/a[@class="member"]')
    item['buddies'] = [
        b.xpath('normalize-space(text())').extract()
        for b in buddies
    ]
    return item

Example #2

0

Show file

File: calendar.py Project: thalesfc/scrapy-fatsecret

def parse_food_diary(response):
    item = FoodDiary()
    item['id'] = common_lib.get_page_id(response)
    item['user_id'] = common_lib.get_user_id(response)
    item['link'] = response.url
    item['date'] = response.xpath('normalize-space(\
            //div[@class="subtitle"]/text())').extract()

    general_info = response.xpath('//table[@class="foodsNutritionTbl"]\
            //td[@class="sub"]/text()').extract()
    if len(general_info) < 4:
        logging.log(logging.WARNING, "Could not found general food info \
                on page " + response.url)
    else:
        item['food'] = {
            'fat': general_info[0],
            'carbs': general_info[1],
            'prot': general_info[2],
            'cals': general_info[3]
        }

    dishes_xpath = response.xpath('//table\
            [@class="generic foodsNutritionTbl"]//tr[@valign="top"]')
    dishes = []
    for d in dishes_xpath:
        dish_name = d.xpath('.//b/text()').extract()
        dish_info = d.xpath('.//td[@class="normal"]')

        if len(dish_info) < 4:
            logging.log(logging.WARNING,
                        "Could not find info for food %s @ url %s" %
                        (dish_name[0], response.url))
            continue

        dish = {
            'name': dish_name,
            'fat': dish_info[0].xpath('text()').extract(),
            'carbs': dish_info[1].xpath('text()').extract(),
            'prot': dish_info[2].xpath('text()').extract(),
            'cals': dish_info[3].xpath('text()').extract()
        }
        dishes.append(dish)

    item['dishes'] = dishes

    item['rdi'] = response.xpath('normalize-space(//div[@class="big"]\
            /text())').extract()

    return item

Example #3

0

Show file

File: calendar.py Project: thalesfc/scrapy-fatsecret

def parse_exercise_diary(response):
    item = ExerciseDiary()

    item['id'] = common_lib.get_page_id(response)
    item['user_id'] = common_lib.get_user_id(response)
    item['link'] = response.url
    item['date'] = response.xpath('normalize-space(\
            //div[@class="subtitle"]/text())').extract()

    summary_data = response.xpath('\
            //table[@class="generic activityValuesTbl"]\
            //td[@class="sub"]/text()').extract()
    if len(summary_data) < 2:
        logging.log(logging.WARNING, "Exercise summary data not found for\
                page %s." % response.url)
    else:
        item['summary'] = {
            'time_spent': summary_data[0],
            'calc': summary_data[1]
        }

    exercises_xpath = response.xpath('//tr[starts-with(@id, "infsec")]')
    exercises = []
    for e in exercises_xpath:
        time_spent_1 = e.xpath('.//div[@class=" activityCell bTop"]\
                /text()').extract()
        time_spent_2 = e.xpath('.//div[@class="activityCell bLeft bTop"]\
                /a/b/text()').extract()
        time_spent_3 = e.xpath('.//div[@class="activityCell bTop"]\
                /text()').extract()

        time_spent = (time_spent_1 if time_spent_1
                      else (time_spent_2 if time_spent_2 else time_spent_3))
        exercise = {
            'name': e.xpath('.//b/text()').extract()[0],
            'time_spent': time_spent[0],
            'cals': e.xpath('.//div[@class="activityCell bTop bRight"]\
                    /text()').extract()[0]
        }

        exercises.append(exercise)
    item['exercises'] = exercises

    return item

Example #4

0

Show file

File: posts.py Project: thalesfc/scrapy-fatsecret

def parse_post(response):
    item = PostItem()
    item['id'] = common_lib.get_page_id(response)
    item['link'] = response.url
    item['user_id'] = common_lib.get_user_id(response)
    item['date'] = response.xpath('//div[@class="breadcrumb_noLink"]/\
            text()').extract()

    # textual fields
    content = response.xpath('//table[@class="generic breakout"]')
    item['text'] = content.xpath('normalize-space(tr/td/div[2]/text())')\
        .extract()
    item['weight'] = {
        'current': content.xpath('tr/td/div[3]/table/tr/td[2]\
                /span[1]/text()').extract(),
        'lost_sofar': content.xpath('tr/td/div[3]/table/tr/\
                td[2]/span[2]/b/text()').extract()
    }

    diet_status = content.xpath('normalize-space(tr/td/div[3]\
            /table/tr/td[2]/text()[4])').extract()
    item['diet'] = {
        'status': diet_status[0].strip() if diet_status else None,
        'name':  content.xpath('normalize-space(tr/td/div[3]\
            //div[@class="smallText"][2]/a/text())').extract()
    }

    # comments
    comments_xpath = response.xpath('//tr[@class="listrow"]/td')
    comments = []
    for comment_xpath in comments_xpath:
        comment = {
            'user_id': comment_xpath.xpath('div[2]/a/text()').extract(),
            'date': " ".join(comment_xpath.xpath('normalize-space(div[2]\
                    /text())').extract()[0].split()[:3]),
            'text': comment_xpath.xpath('normalize-space(div[1]/text())')
            .extract()[0]
        }
        comments.append(comment)
    item['comments'] = comments

    # calendar entry
    kcal = response.xpath('//table[@class="generic"][3]//a[1]\
        /text()').extract()
    food_status = response.xpath('normalize-space(//\
            table[@class="generic"]/tr/td[@class="smallText"][2]\
            /text())').extract()
    food_info = None
    if food_status and food_status[0]:
        food_info = re.search((
            "Fat: (\d+\.\d+\S+) \| "
            "Prot: (\d+\.\d+\S+) \| "
            "Carb: (\d+\.\d+\S+)\."), food_status[0])

    food_text = response.xpath('normalize-space(//table\
            [@class="generic breakout"]//tr[@valign="top"][1])').extract()
    item['food'] = {
        'calories': kcal[0] if kcal else None,
        'fat': food_info.group(1) if food_info else None,
        'prot': food_info.group(2) if food_info else None,
        'carb': food_info.group(3) if food_info else None,
        'text': food_text[0].replace(unichr(160), " ")
        if food_text else None
    }

    exercise_text = response.xpath('normalize-space(//table\
            [@class="generic breakout"]//tr[@valign="top"][2])').extract()
    item['exercise'] = {
        'calories': response.xpath('//table[@class="generic"][4]/tr/td\
                /a/text()').extract(),
        'text': exercise_text[0].replace(unichr(160), " ")
        if exercise_text else None
    }

    # likes
    base_url = 'http://www.fatsecret.com/ajax/FeedSupporters.aspx'
    params = {'id': item['id'], 'tid': '2'}
    import requests
    r = requests.get(base_url, params)
    item['likes'] = re.findall('>(\S+)<\/a>', r.content)

    # TODO include html
    # item['html'] = response.body

    return item