def test_date_tags(self): example_posts = [ [{'message': u''' sao paulo hoje, dia 27 ''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 27)], [{'message': u''' 4a feira''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 2)], [{'message': u''' terça-feira,''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 8)], [{'message': u''' quarta-feira,''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 2)], [{'message': u''' 10 de outubro''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 10)], [{'message': u''' 15 outubro''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 15)], [{'message': u''' 20/10''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 20)], [{'message': u'''Sexta, dia 04 outubro ''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''04/Out (Sexta-feira)''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''QUINTA 03/10''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 3)], [{'message': u'''sexta 04/10/2013''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''sexta feira(04/10)''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''sexta, dia 4, 12:00.''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''na sexta, 4''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u''' sexta, 04,''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u''' 6a feira''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''SEXTA FEIRA DIA 4''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''amanha''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 3)], [{'message': u'''sexta após às 18:00 ou sábado de manha''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''sexta a noite ou sábado''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], [{'message': u'''sexta-feira, apos as 22h30 ou sabado o mais cedo possivel!''', 'created_time': '2013-10-10T00:11:02+0000','from': {'id': '1'}}, datetime.datetime(2013, 10, 4)], ] for p in example_posts: post = CaronaPost(p[0]) post.creation_date = datetime.datetime(2013, 10, 2) self.assertTrue(post.retrieve_date_tags(), 'retrieve date tags') print(post.content_clean) print(post.tag_date) self.assertEquals(post.tag_date, p[1], 'retrieve correct tag date') return
def test_entire_post(self): example_posts = [ { 'post': { 'message': ('Minha amiga OFERECE carona SP -> SC.\n' 'Dia: 10/10 (Quinta Feira)\n' 'Hora: 11:30\n' 'Vagas: 2\n' 'Pega: Metro Barra Funda\n' 'Deixa: Em casa.\n' 'Preço: R$ 30,00\n' ), 'created_time': '2013-10-10T00:11:02+0000', 'from':{ 'id': '1' } }, 'tag_time': datetime.datetime(2013, 10, 10, 11, 30), 'tag_time_to': datetime.datetime(2013, 10, 10, 12, 30), 'vagas': 2, 'ofereco_procuro': 'oferecer', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, { 'post': { 'message': 'preciso de carona de sao carlos pra sao paulo tel 98106 9357 tim valeu', 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 10, 10, 6, 0), 'tag_time_to': datetime.datetime(2013, 10, 10, 23, 59), 'ofereco_procuro': 'procurar', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': 'preciso de carona de sao carlos pra sao paulo tel 98106 9357 tim valeu', 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 10, 10, 6, 0), 'tag_time_to': datetime.datetime(2013, 10, 10, 23, 59), 'ofereco_procuro': 'procurar', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': 'ofereco carona de sao paulo --->>>> sao carlos dia 08/11 (sexta feira) ' 'saio as 16:00 horas , do metro trianom masp r$30,00 - com 3 no carro ' 'contato face ou (16)9 9734-3553 - tim', 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 8, 16, 0), 'tag_time_to': datetime.datetime(2013, 11, 8, 17, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, { 'post': { 'message': 'ofereco : sao paulo ----> sao carlos domingo (10/11) as 16:00 no metro ' 'vila madalena , linha verde . deixo em casa em sao carlos 4 pessoas ' 'no carro valor : 30 reais tratar por inbox', 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 10, 16, 0), 'tag_time_to': datetime.datetime(2013, 11, 10, 17, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, { 'post': { 'message': 'procuro sao carlos------->sao paulo amanha 08/11 saindo entre ' '12h30 e 16h 2 vagas, contato inbox ou (11)963392565', 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 8, 12, 00), 'tag_time_to': datetime.datetime(2013, 11, 8, 16, 0), 'vagas': 2, 'ofereco_procuro': 'procurar', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': 'ofereco sao carlos -> sao paulo amanha (08/11) as 18h deixo na rodoviaria do tiete contato inbox', 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 8, 18, 00), 'tag_time_to': datetime.datetime(2013, 11, 8, 19, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': 'ofereco carona sao carlos --------- sao paulo amanha as 10 da manha sexta dia 08 deixo no metro carrao', 'created_time': '2013-11-05T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 8, 10, 00), 'tag_time_to': datetime.datetime(2013, 11, 8, 11, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': 'procuro sampa a sanca na quinta feira 14.11 a tarde duas vagas !!!!', 'created_time': '2013-11-05T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 14, 12, 00), 'tag_time_to': datetime.datetime(2013, 11, 14, 18, 0), 'ofereco_procuro': 'procurar', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, { 'post': { 'message': 'ofereco carona de sao paulo (tiete) para sao carlos. sexta dia 08/11 as 19hrs. tel. 17_9 91870878 vlw', 'created_time': '2013-11-05T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 8, 19, 00), 'tag_time_to': datetime.datetime(2013, 11, 8, 20, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, { 'post': { 'message': 'procuro carona de: sao carlos, sp, br ate: sao paulo, sp, br dia ' '10/11/2013 (domingo). chegando pelas 13hs (1h a mais 1h a ' 'menos seria botimo) em sampa. valeu. use https://caronas.co', 'created_time': '2013-11-05T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 10, 13, 00), 'tag_time_to': datetime.datetime(2013, 11, 10, 14, 0), 'ofereco_procuro': 'procurar', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': 'ofereco carona hoje sexta-feira as 14 horas!!! ' 'sao paulo pra sao carlos!!! ponto de encontro paulista com a ' 'rua frei caneca em frente ao banco itau em sanca deixo em casa ' 'valor 30 reais!!! 016 99786 6449', 'created_time': '2013-11-08T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 8, 14, 00), 'tag_time_to': datetime.datetime(2013, 11, 8, 15, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, { 'post': { 'message': 'so tenho mais 1 vaga carona sao carlos para sao paulo amanha ' 'domingo 20:00h. saio rodoviaria sao carlos, ao lado da padaria da ' 'esquina de frente para a saida dos onibus. deixo metro butanta resta ' '1 vaga. r$ 30,00. so que estou com acesso restrito ao fb. ' '(11) 98311-5595 (tim). (11) 9 9499-2776 claro e (11) 7741-6683 ' 'abs mario erba', 'created_time': '2013-11-09T00:11:02+0000', 'from': { 'id': '1' } }, 'tag_time': datetime.datetime(2013, 11, 10, 20, 00), 'tag_time_to': datetime.datetime(2013, 11, 10, 21, 0), 'ofereco_procuro': 'oferecer', 'origin': 'sao carlos/SP', 'destiny': 'sao paulo/SP' }, { 'post': { 'message': u"""Procuro carona! Quinta-Feira, dia 10/10. São Paulo -> São Carlos Depois do almoço !""", 'created_time': '2013-10-10T00:11:02+0000', 'from': { 'id': '1' } }, 'datetime': datetime.datetime(2013, 10, 10, 12, 0), 'tag_time': datetime.datetime(2013, 10, 10, 12, 0), 'tag_time_to': datetime.datetime(2013, 10, 10, 18, 0), 'ofereco_procuro': 'procurar', 'origin': 'sao paulo/SP', 'destiny': 'sao carlos/SP' }, ] cities = [ [u'sao paulo', u'Sanpa', u'Sampa', ur'Sao\s*Paulo', u'SP', ur'sao paulo\s?\(.*?\)', 'sao paulo, sp, br'], [u'sao carlos', u'Sanca', u'Samca', u'Sao\s*Carlos', u'SC', 'sao carlos, sp, br'] ] for p in example_posts: post = CaronaPost(p['post']) ## settings cities post.city1_list = cities[0] post.city1 = cities[0][0] post.city1_state = 'SP' post.city2_state = 'SP' post.city2 = cities[1][0] post.city2_list = cities[1] ## datetime print post.content_clean self.assertTrue(post.retrieve_date_tags(), 'retrieve date tags') print post.tag_date self.assertTrue(post.retrieve_time_tags(), 'retrieve time tags ' + str(post.tag_time)) print 'time', post.tag_time, p['tag_time'] print 'time_to', post.tag_time_to, p['tag_time_to'] self.assertEquals(post.tag_time, p['tag_time'], 'retrieve date tags') self.assertEquals(post.tag_time_to, p['tag_time_to'], 'retrieve date tags') ## vagas if 'vagas' in p: self.assertTrue(post.retrieve_vagas(), 'retrieve vagas') print post.tag_num_vagas self.assertEquals(post.tag_num_vagas, p['vagas'], 'retrieve date tags') ## ofereco / procuro self.assertTrue(post.retrieve_ofereco_procuro_tag(), 'ofereco/procuro') print post.tag_ofereco_procuro self.assertEquals(post.tag_ofereco_procuro, p['ofereco_procuro'], 'retrieve ofereco/procuro tags') ## origin / destiny self.assertTrue(post.retrieve_origin_destiny(), 'origin/destiny tags') print post.tag_origin, '-->', post.tag_destiny print post.tag_origin, p['origin'] print post.tag_destiny, p['destiny'] self.assertEquals(post.tag_origin, p['origin'], 'origin tags') self.assertEquals(post.tag_destiny, p['destiny'], 'destiny tags') return
def retrieve_posts(self, fb_group_id): ## persistence persistence = PersistenceController() city1, city1_state, city1_list, city2, city2_state, city2_list = \ persistence.get_cities_by_fb_group_id(fb_group_id) ## getting feed fb_manager = FBGroupsController(fb_group_id) feed = fb_manager.get_posts(last_time_checked=self.time_interval) for fb_post in feed: ## check if the post is not commented if (not self.post_is_commented(fb_post['message']) ## check if it is already parsed and not persistence.exists_post(fb_post['id'])): # pprint(fb_post) ## create new carona post carona_post = CaronaPost(fb_post) pprint(carona_post.content_clean) ## setting origin and destiny carona_post.city1 = city1 carona_post.city1_state = city1_state carona_post.city2 = city2 carona_post.city2_state = city2_state carona_post.city1_list = city1_list carona_post.city2_list = city2_list ## date / time has_date_tag = carona_post.retrieve_date_tags() carona_post.retrieve_time_tags() # has_time_interval = carona_post.retrieve_time_interval() has_time_tag = True if carona_post.tag_time else False ## origin_destiny has_origin_destiny = carona_post.retrieve_origin_destiny() ## oferecer/ procurar has_ofereco_procuro = carona_post.retrieve_ofereco_procuro_tag() ## [OPTIONAL] numero de vagas has_vagas = carona_post.retrieve_vagas() ## check the tag requirements # print(has_date_tag, has_time_tag, has_origin_destiny, has_ofereco_procuro) if has_date_tag and has_time_tag and has_origin_destiny and has_ofereco_procuro: ## saving in the db # pprint(str(carona_post)) # pprint('---------------------') persistence.add_carona(carona_post) else: print('*************** wrong') pprint(carona_post.content_clean) pprint(str(carona_post)) print('*******************************************') self.log_not_parsed_post(carona_post) else: ## TODO: call logger pass return