def get_debate_with_url(debate_url): """ :param debate_url: :return: """ debate = BeautifulSoup(urllib.urlopen(root + debate_url).read(), 'lxml').find('div', id='debate') title = debate.find('h1', class_='top').get_text() pro_member = debate.find('div', id='instigatorWrap').find('div', class_='un').get_text() con_member = debate.find('div', id='contenderWrap').find('div', class_='un').get_text() params_table = debate.find('table', id='parameters') params_dict = {} for row in params_table.find_all('tr'): try: params_dict[row.find('td', class_='c1').get_text().replace(':', '')] = row.find('td', class_='c2').get_text() params_dict[row.find('td', class_='c3').get_text().replace(':', '')] = row.find('td', class_='c4').get_text() except AttributeError: continue debate_obj = Debate(title=title, link=debate_url, debate_no=params_dict['Debate No'], category=params_dict['Category'], pro_member=pro_member, con_member=con_member, started=params_dict['Started'], viewed=params_dict['Viewed']) round_soup = debate.find('table', id='rounds') for round_num in range(1, 5): try: metadict = {} pro_con_data = round_soup.find('tr', id='round' + str(round_num)).find_all('div', class_='round-inner') # USE REGEX to find pro and con parts if re.compile('(\n)*Pro').match(pro_con_data[0].get_text()) is not None: metadict['pro'] = pro_con_data[0].get_text() metadict['con'] = pro_con_data[1].get_text() else: metadict['pro'] = pro_con_data[1].get_text() metadict['con'] = pro_con_data[0].get_text() debate_obj.add_round(_Round(con_data=metadict['con'], pro_data=metadict['pro'])) except AttributeError: continue except IndexError: continue return debate_obj
def read_debates_from_file(file_name): """ Reads objects from files :param file_name: :return: """ debates = set() with open(file_name, 'r') as input_file: data = input_file.read() for json_obj in data.split('\n'): try: debates.add(Debate.load_from_json(json.loads(json_obj))) except ValueError: print json_obj return debates