def test_split_content(self): with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), 'r')\ as f: text = f.read().decode('utf-8') text = ''.join(text) split_doc = split_minutes_content(text) self.assertEqual(len(split_doc), 2)
def run(self): with self.input().open("r") as I: text = I.read() text = text.decode('utf8') text = ''.join(text) split_doc = split_minutes_content(text) with self.output().open("w") as O: O.write(split_doc[0].encode("utf8"))
def run(self): with self.input().open("r") as I: text = I.read() text = text.decode('utf8') text = ''.join(text) split_doc = split_minutes_content(text) trans_table = dict.fromkeys(map(ord, u"\n"), None) sp_text = split_doc[1].translate(trans_table) with self.output().open("w") as O: O.write(sp_text.encode('utf8'))
def test_split_discussion(self): with open('{}{}'.format(base_resources, '2011-1-19raw.txt'), mode='r')\ as f: text = f.read().decode('utf8') text = ''.join(text) split_doc = split_minutes_content(text) self.assertEqual(len(split_doc), 2) #print(split_doc[1][:3000]) #convos = split_statements_from_discussion(split_doc[1]) trans_table = dict.fromkeys(map(ord, u"\n"), None) #print(trans_table) sp_text = split_doc[1].translate(trans_table) #print(sp_text) convos = split_statements_via_colon(sp_text) #print("Statements found: {}".format(convos[:5])) for i in range(0, 50): print(unicode(convos[i])) self.assertGreater(len(convos), 100)