Esempio n. 1
0
 def update_questions(self):
     """
     Update self.questions with Question instances, which contain
     text, answers, user_answer, and explanation attributes. See
     the Question class in objects.py for more details. Like
     User.update_questions(), note that this can take a while due to
     OKCupid displaying only ten questions on each page, potentially
     requiring a large number of requests to the server.
     """
     count = 0
     for category in ['Ethics', 'Sex', 'Religion', 'Lifestyles', 'Dating', 'Other']:
         keep_going = True
         while keep_going:
             questions_data = {
                 'low': 1 + 10*count,
                 category: '1',
                 }
             questions_request = self._session.post('http://www.okcupid.com/profile/{0}/questions'.format(self.name), data=questions_data)
             tree = html.fromstring(questions_request.content.decode('utf8'))
             for div in tree.iter('div'):
                 if 'id' in div.attrib and re.match(r'question_(\d+)', div.attrib['id']):
                     explanation = ''
                     number = re.match(r'question_(\d+)', div.attrib['id']).group(1)
                     text = helpers.replace_chars(div.xpath(".//p[@class = 'qtext']")[0].text)
                     answer_eles = div.xpath(".//input[contains(@id,'question_{0}_qans')]".format(number))
                     answers = []
                     for ele in answer_eles:
                         answers.append(ele.attrib['value'])
                     user_answer_ele = div.xpath(".//span[@id = 'answer_viewer_{0}']".format(number))[0]
                     user_answer = user_answer_ele.text.strip()
                     they_approve = None
                     if 'class' in user_answer_ele.attrib and user_answer_ele.attrib['class'] == 'not_accepted':
                         they_approve = False
                     elif len(user_answer):
                         they_approve = True
                     answer_target = div.xpath(".//span[@id = 'answer_target_{0}']".format(number))[0]
                     you_approve = None
                     if 'class' in answer_target.attrib and answer_target.attrib['class'] == 'not_accepted':
                         you_approve = False
                     elif len(user_answer):
                         you_approve = True                    
                     explanation = div.xpath(".//span[@id = 'note_target_{0}']".format(number))[0].text
                     if explanation is None:
                         explanation = ''
                     else:
                         explanation = helpers.replace_chars(explanation.strip())
                     if text not in [q.text for q in self.questions]:
                         self.questions.append(ProfileQuestion(text,
                                                               answers,
                                                               user_answer,
                                                               explanation,
                                                               self,
                                                               category,
                                                               you_approve,
                                                               they_approve))
             next = tree.xpath("//a[text() = 'Next']")
             if not len(next) or 'href' not in next[0].attrib:
                 keep_going = False
             else:
                 count += 1
Esempio n. 2
0
 def update_questions(self):
     """
     Update self.questions with Question instances, which contain
     text, user_answer, and explanation attributes. See
     the Question class in objects.py for more details. Like
     User.update_questions(), note that this can take a while due to
     OKCupid displaying only ten questions on each page, potentially
     requiring a large number of requests to the server.
     """
     keep_going = True
     question_number = 0
     while keep_going:
         questions_data = {
             'low': 1 + question_number,
             }
         get_questions = self._session.post(
         'http://www.okcupid.com/profile/{0}/questions'.format(self.name),
         data=questions_data)
         tree = html.fromstring(get_questions.content.decode('utf8'))
         next_wrapper = tree.xpath("//li[@class = 'next']")
         question_wrappers = tree.xpath("//div[contains(@id, 'question_')]")
         for div in question_wrappers:
             if not div.attrib['id'][9:].isdigit():
                 question_wrappers.remove(div)
         for div in question_wrappers:
             question_number += 1
             explanation = ''
             text = helpers.replace_chars(div.xpath(".//div[@class = 'qtext']/p/text()")[0])
             user_answer = div.xpath(".//span[contains(@id, 'answer_target_')]/text()")[0].strip()
             explanation_span = div.xpath(".//span[@class = 'note']")
             if explanation_span[0].text is not None:
                 explanation = explanation_span[0].text.strip()
             self.questions.append(Question(text, user_answer, explanation))
         if not len(next_wrapper):
             keep_going = False
Esempio n. 3
0
 def update_questions(self):
     """
     Update `self.questions` with a sequence of question objects,
     whose properties can be found in objects.py. Note that this
     can take a while due to OKCupid displaying only ten questions
     on each page, potentially requiring a large number of requests.
     """
     keep_going = True
     question_number = 0
     while keep_going:
         questions_data = {
             'low': 1 + question_number,
             }
         get_questions = self._session.post(
         'http://www.okcupid.com/profile/{0}/questions'.format(self.username),
         data=questions_data)
         tree = html.fromstring(get_questions.content.decode('utf8'))
         next_wrapper = tree.xpath("//li[@class = 'next']")
         # Get a list of each question div wrapper, ignore the first because it's an unanswered question
         question_wrappers = tree.xpath("//div[contains(@id, 'question_')]")[1:]
         for div in question_wrappers:
             if not div.attrib['id'][9:].isdigit():
                 question_wrappers.remove(div)
         for div in question_wrappers:
             question_number += 1
             explanation = ''
             text = helpers.replace_chars(div.xpath(".//div[@class = 'qtext']/p/text()")[0])
             user_answer = div.xpath(".//li[contains(@class, 'mine')]/text()")[0]
             explanation_p = div.xpath(".//p[@class = 'value']")
             if explanation_p[0].text is not None:
                 explanation = explanation_p[0].text
             self.questions.append(Question(text, user_answer, explanation))
         if not len(next_wrapper):
             keep_going = False
Esempio n. 4
0
 def update_questions(self):
     """
     Update `self.questions` with a sequence of question objects,
     whose properties can be found in objects.py. Note that this
     can take a while due to OKCupid displaying only ten questions
     on each page, potentially requiring a large number of requests.
     """
     count = 0
     question_number = 0
     keep_going = True
     while keep_going:
         questions_data = {
             'low': 1 + 10*count,
             }
         get_questions = self._session.post('http://www.okcupid.com/profile/{0}/questions'.format(self.username), data=questions_data)
         time_start = time.clock()
         tree = html.fromstring(get_questions.content.decode('utf8'))
         for div in tree.iter('div'):
             if 'id' in div.attrib and re.match(r'question_(\d+)', div.attrib['id']):
                 question_number += 1
                 explanation = ''
                 number = re.match(r'question_(\d+)', div.attrib['id']).group(1)
                 text = helpers.replace_chars(div.xpath(".//p[@class = 'qtext']")[0].text)
                 answer_eles = div.xpath(".//li")
                 answers = {}
                 # Use a dictionary/regex for the answer values
                 # because occasionally the numbers are not sequential
                 for ele in answer_eles:
                     value = re.match(r'self_answers_\d+_(\d+)', ele.attrib['id']).group(1)
                     answers[value] = ele.text
                 acceptable_answers = [ele.text for ele in answer_eles if ele.attrib['class'] in (' match', 'mine match')]
                 importance_no = div.xpath(".//input[@id = 'question_{0}_importance']/@value".format(number))[0]
                 if importance_no == '5':
                     importance = 'Irrelevant'
                 elif importance_no == '4':
                     importance = 'A little important'
                 elif importance_no == '3':
                     importance = 'Somewhat important'
                 elif importance_no == '2':
                     importance = 'Very important'
                 elif importance_no == '1':
                     importance = 'Mandatory'
                 explanation_p = div.xpath(".//p[@class = 'explanation']")
                 if explanation_p[0].text is not None:
                     explanation = explanation_p[0].text
                 answer_int = int(div.xpath(".//input[@id = 'question_{0}_answer']/@value".format(number))[0])
                 if question_number > 1 and text not in [q.text for q in self.questions]:
                     user_answer = answers[str(answer_int)]
                     self.questions.append(UserQuestion(text, answers, user_answer, explanation, self, acceptable_answers, importance))
         next = tree.xpath("//a[text() = 'Next']")
         if not len(next) or 'href' not in next[0].attrib:
             keep_going = False
         else:
             count += 1
Esempio n. 5
0
 def update_questions(self):
     """
     Update self.questions with Question instances, which contain
     text, answers, user_answer, and explanation attributes. See
     the Question class in objects.py for more details. Like
     User.update_questions(), note that this can take a while due to
     OKCupid displaying only ten questions on each page, potentially
     requiring a large number of requests to the server.
     """
     count = 0
     for category in [
             'Ethics', 'Sex', 'Religion', 'Lifestyles', 'Dating', 'Other'
     ]:
         keep_going = True
         while keep_going:
             questions_data = {
                 'low': 1 + 10 * count,
                 category: '1',
             }
             questions_request = self._session.post(
                 'http://www.okcupid.com/profile/{0}/questions'.format(
                     self.name),
                 data=questions_data)
             tree = html.fromstring(
                 questions_request.content.decode('utf8'))
             for div in tree.iter('div'):
                 if 'id' in div.attrib and re.match(r'question_(\d+)',
                                                    div.attrib['id']):
                     explanation = ''
                     number = re.match(r'question_(\d+)',
                                       div.attrib['id']).group(1)
                     text = helpers.replace_chars(
                         div.xpath(".//p[@class = 'qtext']")[0].text)
                     answer_eles = div.xpath(
                         ".//input[contains(@id,'question_{0}_qans')]".
                         format(number))
                     answers = []
                     for ele in answer_eles:
                         answers.append(ele.attrib['value'])
                     user_answer_ele = div.xpath(
                         ".//span[@id = 'answer_viewer_{0}']".format(
                             number))[0]
                     user_answer = user_answer_ele.text.strip()
                     they_approve = None
                     if 'class' in user_answer_ele.attrib and user_answer_ele.attrib[
                             'class'] == 'not_accepted':
                         they_approve = False
                     elif len(user_answer):
                         they_approve = True
                     answer_target = div.xpath(
                         ".//span[@id = 'answer_target_{0}']".format(
                             number))[0]
                     you_approve = None
                     if 'class' in answer_target.attrib and answer_target.attrib[
                             'class'] == 'not_accepted':
                         you_approve = False
                     elif len(user_answer):
                         you_approve = True
                     explanation = div.xpath(
                         ".//span[@id = 'note_target_{0}']".format(
                             number))[0].text
                     if explanation is None:
                         explanation = ''
                     else:
                         explanation = helpers.replace_chars(
                             explanation.strip())
                     if text not in [q.text for q in self.questions]:
                         self.questions.append(
                             ProfileQuestion(text, answers, user_answer,
                                             explanation, self, category,
                                             you_approve, they_approve))
             next = tree.xpath("//a[text() = 'Next']")
             if not len(next) or 'href' not in next[0].attrib:
                 keep_going = False
             else:
                 count += 1
Esempio n. 6
0
 def update_questions(self):
     """
     Update `self.questions` with a sequence of question objects,
     whose properties can be found in objects.py. Note that this
     can take a while due to OKCupid displaying only ten questions
     on each page, potentially requiring a large number of requests.
     """
     count = 0
     question_number = 0
     keep_going = True
     while keep_going:
         questions_data = {
             'low': 1 + 10 * count,
         }
         get_questions = self._session.post(
             'http://www.okcupid.com/profile/{0}/questions'.format(
                 self.username),
             data=questions_data)
         time_start = time.clock()
         tree = html.fromstring(get_questions.content.decode('utf8'))
         for div in tree.iter('div'):
             if 'id' in div.attrib and re.match(r'question_(\d+)',
                                                div.attrib['id']):
                 question_number += 1
                 explanation = ''
                 number = re.match(r'question_(\d+)',
                                   div.attrib['id']).group(1)
                 text = helpers.replace_chars(
                     div.xpath(".//p[@class = 'qtext']")[0].text)
                 answer_eles = div.xpath(".//li")
                 answers = {}
                 # Use a dictionary/regex for the answer values
                 # because occasionally the numbers are not sequential
                 for ele in answer_eles:
                     value = re.match(r'self_answers_\d+_(\d+)',
                                      ele.attrib['id']).group(1)
                     answers[value] = ele.text
                 acceptable_answers = [
                     ele.text for ele in answer_eles
                     if ele.attrib['class'] in (' match', 'mine match')
                 ]
                 importance_no = div.xpath(
                     ".//input[@id = 'question_{0}_importance']/@value".
                     format(number))[0]
                 if importance_no == '5':
                     importance = 'Irrelevant'
                 elif importance_no == '4':
                     importance = 'A little important'
                 elif importance_no == '3':
                     importance = 'Somewhat important'
                 elif importance_no == '2':
                     importance = 'Very important'
                 elif importance_no == '1':
                     importance = 'Mandatory'
                 explanation_p = div.xpath(".//p[@class = 'explanation']")
                 if explanation_p[0].text is not None:
                     explanation = explanation_p[0].text
                 answer_int = int(
                     div.xpath(
                         ".//input[@id = 'question_{0}_answer']/@value".
                         format(number))[0])
                 if question_number > 1 and text not in [
                         q.text for q in self.questions
                 ]:
                     user_answer = answers[str(answer_int)]
                     self.questions.append(
                         UserQuestion(text, answers, user_answer,
                                      explanation, self, acceptable_answers,
                                      importance))
         next = tree.xpath("//a[text() = 'Next']")
         if not len(next) or 'href' not in next[0].attrib:
             keep_going = False
         else:
             count += 1