def api_request(self, command, **kwargs): if 'data' in kwargs: data = to_unicode(kwargs.pop('data')).encode('utf-8', 'replace') else: data = None headers = {} if not command.startswith('applications'): today = local2utc(datetime.now()).strftime('%Y-%m-%d') token = sha256(self.username + self.APITOKEN + today).hexdigest() headers['Authorization'] = 'Basic %s' % (b64encode('%s:%s' % (self.username, self.password))) headers['X-Platform'] = 'android' headers['X-Client-Version'] = self.APIVERSION headers['X-AUM-Token'] = token url = self.buildurl(self.absurl('/api/%s' % command), **kwargs) if isinstance(url, unicode): url = url.encode('utf-8') req = self.request_class(url, data, headers) buf = self.openurl(req).read() try: r = json.loads(buf) except ValueError: raise ValueError(buf) return r
def __init__(self, browser, url, tree): Content.__init__(self, browser) self.url = url self.id = url2id(self.url) if tree is None: return header = tree.find("header") self.title = u" — ".join([a.text for a in header.find("h1").xpath(".//a")]) try: a = self.browser.parser.select(header, "a[rel=author]", 1) except BrokenPageError: self.author = "Anonyme" self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib["href"].split("/")[2]) self.body = self.browser.parser.tostring(self.browser.parser.select(tree, "div.content", 1)) try: self.date = datetime.strptime( self.browser.parser.select(header, "time", 1).attrib["datetime"].split("+")[0], "%Y-%m-%dT%H:%M:%S" ) self.date = local2utc(self.date) except BrokenPageError: pass for form in self.browser.parser.select(tree.find("footer"), "form.button_to"): if form.attrib["action"].endswith("/for"): self.relevance_url = form.attrib["action"].rstrip("for").rstrip("against") self.relevance_token = self.browser.parser.select(form, "input[name=authenticity_token]", 1).attrib[ "value" ] self.score = int(self.browser.parser.select(tree, "div.figures figure.score", 1).text)
def parse(self): self.url = "%s#%s" % (self.preurl, self.div.attrib["id"]) self.title = unicode(self.browser.parser.select(self.div.find("h2"), "a.title", 1).text) try: a = self.browser.parser.select(self.div.find("p"), "a[rel=author]", 1) except BrokenPageError: self.author = "Anonyme" self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib["href"].split("/")[2]) self.date = datetime.strptime( self.browser.parser.select(self.div.find("p"), "time", 1).attrib["datetime"].split("+")[0], "%Y-%m-%dT%H:%M:%S", ) self.date = local2utc(self.date) content = self.div.find("div") try: signature = self.browser.parser.select(content, "p.signature", 1) except BrokenPageError: # No signature. pass else: content.remove(signature) self.signature = self.browser.parser.tostring(signature) self.body = self.browser.parser.tostring(content) self.score = int(self.browser.parser.select(self.div.find("p"), "span.score", 1).text) forms = self.browser.parser.select(self.div.find("footer"), "form.button_to") if len(forms) > 0: self.relevance_url = forms[0].attrib["action"].rstrip("for").rstrip("against") self.relevance_token = self.browser.parser.select(forms[0], "input[name=authenticity_token]", 1).attrib[ "value" ]
def __init__(self, browser, url, tree): Content.__init__(self, browser) self.url = url self.id = url2id(self.url) if tree is None: return header = tree.find('header') self.title = u' — '.join([a.text for a in header.find('h1').xpath('.//a')]) try: a = self.browser.parser.select(header, 'a[rel=author]', 1) except BrokenPageError: self.author = 'Anonyme' self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.body = self.browser.parser.tostring(self.browser.parser.select(tree, 'div.content', 1)) try: self.date = datetime.strptime(self.browser.parser.select(header, 'time', 1).attrib['datetime'].split('+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) except BrokenPageError: pass for form in self.browser.parser.select(tree.find('footer'), 'form.button_to'): if form.attrib['action'].endswith('/for'): self.relevance_url = form.attrib['action'].rstrip('for').rstrip('against') self.relevance_token = self.browser.parser.select(form, 'input[name=authenticity_token]', 1).attrib['value'] self.score = int(self.browser.parser.select(tree, 'div.figures figure.score', 1).text)
def api_request(self, command, **kwargs): if 'data' in kwargs: data = to_unicode(kwargs.pop('data')).encode('utf-8', 'replace') else: data = None headers = {} if not command.startswith('applications'): today = local2utc(datetime.now()).strftime('%Y-%m-%d') token = sha256(self.username + self.APITOKEN + today).hexdigest() headers['Authorization'] = 'Basic %s' % (b64encode( '%s:%s' % (self.username, self.password))) headers['X-Platform'] = 'android' headers['X-Client-Version'] = self.APIVERSION headers['X-AUM-Token'] = token url = self.buildurl(self.absurl('/api/%s' % command), **kwargs) if isinstance(url, unicode): url = url.encode('utf-8') req = self.request_class(url, data, headers) buf = self.openurl(req).read() try: r = json.loads(buf) except ValueError: raise ValueError(buf) return r
def __init__(self, browser, url, tree): super(Article, self).__init__(browser) self.url = url self.id = url2id(self.url) if tree is None: return header = tree.find('header') self.title = u' — '.join([a.text for a in header.find('h1').xpath('.//a')]) try: a = header.xpath('.//a[@rel="author"]')[0] except IndexError: self.author = 'Anonyme' self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.body = lxml.html.tostring(tree.xpath('.//div[has-class("content")]')[0]).decode('utf-8') try: self.date = datetime.strptime(header.xpath('.//time')[0].attrib['datetime'].split('+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) except IndexError: pass for form in tree.find('footer').xpath('//form[has-class("button_to")]'): if form.attrib['action'].endswith('/for'): self.relevance_url = form.attrib['action'].rstrip('for').rstrip('against') self.relevance_token = form.xpath('.//input[@name="authenticity_token"]')[0].attrib['value'] self.score = int(tree.xpath('.//div[has-class("figures")]//figure[has-class("score")]')[0].text)
def iter_threads(self): for thread in self.browser.get_threads(): t = Thread(thread['id']) t.flags = Thread.IS_DISCUSSION t.title = u'Discussion with %s' % thread['name'] t.date = local2utc(datetime.datetime.fromtimestamp(thread['last_message']['utc_timestamp'])) yield t
def parse(self): self.url = '%s#%s' % (self.preurl, self.div.attrib['id']) self.title = unicode(self.browser.parser.select(self.div.find('h2'), 'a.title', 1).text) try: a = self.browser.parser.select(self.div.find('p'), 'a[rel=author]', 1) except BrokenPageError: self.author = 'Anonyme' self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.date = datetime.strptime(self.browser.parser.select(self.div.find('p'), 'time', 1).attrib['datetime'].split('+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) content = self.div.find('div') try: signature = self.browser.parser.select(content, 'p.signature', 1) except BrokenPageError: # No signature. pass else: content.remove(signature) self.signature = self.browser.parser.tostring(signature) self.body = self.browser.parser.tostring(content) self.score = int(self.browser.parser.select(self.div.find('p'), 'span.score', 1).text) forms = self.browser.parser.select(self.div.find('footer'), 'form.button_to') if len(forms) > 0: self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against') self.relevance_token = self.browser.parser.select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
def get_thread_mails(self): mails = { 'member' : {}, 'messages' : [], } try: mails['member']['pseudo'] = self.parser.tocleanstring(self.document.getroot().cssselect('div#message_heading div.username span.name')[0]) except IndexError: mails['member']['pseudo'] = 'Unknown' for li in reversed(self.document.xpath('//ul[@id="thread"]//li[contains(@id, "message_")]')): try: txt = self.parser.tostring(li.xpath('.//div[@class="message_body"]')[0]) except IndexError: continue # 'Match' message txt = html2text(txt).strip() m = re.search(r'(\d+), ', li.xpath('.//span[@class="timestamp"]//script')[0].text) assert m date = local2utc(datetime.fromtimestamp(int(m.group(1)))) id_from = li.find('a').attrib['href'].split('/')[-1].split('?')[0] mails['messages'].append({ 'date' : date, 'message' : unicode(txt), 'id_from' : unicode(id_from), }) return mails
def parse(self): self.url = '%s#%s' % (self.preurl, self.div.attrib['id']) self.title = unicode(self.div.find('h2').xpath('.//a[has-class("title")]')[0].text) try: a = self.div.find('p').xpath('.//a[@rel="author"]')[0] except IndexError: self.author = 'Anonyme' self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.date = datetime.strptime(self.div.find('p').xpath('.//time')[0].attrib['datetime'].split('+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) content = self.div.find('div') try: signature = content.xpath('.//p[has-class("signature")]')[0] except IndexError: # No signature. pass else: content.remove(signature) self.signature = lxml.html.tostring(signature).decode('utf-8') self.body = lxml.html.tostring(content).decode('utf-8') self.score = int(self.div.find('p').xpath('.//span[has-class("score")]')[0].text) forms = self.div.find('footer').xpath('.//form[has-class("button_to")]') if len(forms) > 0: self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against') self.relevance_token = forms[0].xpath('.//input[@name="authenticity_token"]')[0].attrib['value']
def iter_threads(self): for thread in self.browser.get_threads(): t = Thread(thread['id']) t.flags = Thread.IS_DISCUSSION for user in thread['participants']: if user['user']['id'] != self.browser.my_id: t.title = u'Discussion with %s' % user['user']['display_name'] t.date = local2utc(parse_date(thread['modification_date'])) yield t
def parse_date(s): s = s.replace(u'Fév', 'Feb') \ .replace(u'Avr', 'Apr') \ .replace(u'Mai', 'May') \ .replace(u'Juin', 'Jun') \ .replace(u'Juil', 'Jul') \ .replace(u'Aoû', 'Aug') \ .replace(u'Ao\xfbt', 'Aug') \ .replace(u'Déc', 'Dec') return local2utc(_parse_dt(s))
def iter_threads(self): for thread in self.browser.get_threads(): t = Thread(thread['id']) t.flags = Thread.IS_DISCUSSION for user in thread['participants']: if user['user']['id'] != self.browser.my_id: t.title = u'Discussion with %s' % user['user'][ 'display_name'] t.date = local2utc(parse_date(thread['modification_date'])) yield t
def build_request(self, url, *args, **kwargs): headers = kwargs.setdefault('headers', {}) if 'applications' not in url: today = local2utc(datetime.now()).strftime('%Y-%m-%d') token = sha256((self.username + self.APITOKEN + today).encode('utf-8')).hexdigest() headers['Authorization'] = 'Basic %s' % (b64encode(b'%s:%s' % (self.username.encode('utf-8'), self.password.encode('utf-8')))).decode('utf-8') headers['X-Platform'] = 'android' headers['X-Client-Version'] = self.APIVERSION headers['X-AUM-Token'] = token return super(AuMBrowser, self).build_request(url, *args, **kwargs)
def get_thread(self, thread): if not isinstance(thread, Thread): thread = Thread(thread) thread.flags = Thread.IS_DISCUSSION user = self.browser.get_user(thread.id) thread.title = u'Discussion with %s' % user['name'] contact = self.storage.get('contacts', thread.id, default={'lastmsg': 0}) signature = u'Age: %s' % user['age'] signature += u'\nLast online: %s' % user['last_online'] signature += u'\nPhotos:\n\t%s' % '\n\t'.join([user['photo_host'] + photo['large'] for photo in user['photos']]) child = None for msg in self.browser.get_thread_messages(thread.id): flags = 0 if int(contact['lastmsg']) < msg['utc_timestamp']: flags = Message.IS_UNREAD if msg['type'] == 'msg': content = unicode(msg['msg']) elif msg['type'] == 'new_challenge': content = u'A new challenge has been proposed!' elif msg['type'] == 'serie': content = u"I've played" elif msg['type'] == 'end_game': content = u'%s is the winner! (%s VS %s)' % (self.browser.my_name if msg['score']['w'] == self.browser.my_id else user['name'], msg['score']['s'][0], msg['score']['s'][1]) else: content = u'Unknown action: %s' % msg['type'] msg = Message(thread=thread, id=msg['utc_timestamp'], title=thread.title, sender=unicode(self.browser.my_name if msg['from'] == self.browser.my_id else user['name']), receivers=[unicode(self.browser.my_name if msg['from'] != self.browser.my_id else user['name'])], date=local2utc(datetime.datetime.fromtimestamp(msg['utc_timestamp'])), content=content, children=[], parent=None, signature=signature if msg['from'] != self.browser.my_id else u'', flags=flags) if child: msg.children.append(child) child.parent = msg child = msg thread.root = child return thread
def get_thread(self, thread): if not isinstance(thread, Thread): thread = Thread(thread) thread.flags = Thread.IS_DISCUSSION info = self.browser.get_thread(thread.id) for user in info['participants']: if user['user']['id'] == self.browser.my_id: me = HappnContact(user['user']) else: other = HappnContact(user['user']) thread.title = u'Discussion with %s' % other.name contact = self.storage.get('contacts', thread.id, default={'lastmsg': 0}) child = None for msg in info['messages']: flags = 0 if int(contact['lastmsg']) < int(msg['id']): flags = Message.IS_UNREAD if msg['sender']['id'] == me.id: sender = me receiver = other else: sender = other receiver = me msg = Message(thread=thread, id=msg['id'], title=thread.title, sender=sender.name, receivers=[receiver.name], date=local2utc(parse_date(msg['creation_date'])), content=msg['message'], children=[], parent=None, signature=sender.get_text(), flags=flags) if child: msg.children.append(child) child.parent = msg child = msg thread.root = child return thread
def parse_dt(s): now = datetime.datetime.now() if s is None: return local2utc(now) if 'minutes ago' in s: m = int(s.split()[0]) d = now - datetime.timedelta(minutes=m) elif u'–' in s: # Date in form : "Yesterday – 20:45" day, hour = s.split(u'–') day = day.strip() hour = hour.strip() if day == 'Yesterday': d = now - datetime.timedelta(days=1) elif day == 'Today': d = now hour = _parse_dt(hour) d = datetime.datetime(d.year, d.month, d.day, hour.hour, hour.minute) else: #if ',' in s: # Date in form : "Dec 28, 2011") d = _parse_dt(s) return local2utc(d)
def __init__(self, browser, url, tree): super(Article, self).__init__(browser) self.url = url self.id = url2id(self.url) if tree is None: return header = tree.find('header') self.title = u' — '.join( [a.text for a in header.find('h1').xpath('.//a')]) try: a = header.xpath('.//a[@rel="author"]')[0] except IndexError: self.author = 'Anonyme' self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.body = lxml.html.tostring( tree.xpath('.//div[has-class("content")]')[0]).decode('utf-8') try: self.date = datetime.strptime( header.xpath('.//time')[0].attrib['datetime'].split('+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) except IndexError: pass for form in tree.find('footer').xpath( '//form[has-class("button_to")]'): if form.attrib['action'].endswith('/for'): self.relevance_url = form.attrib['action'].rstrip( 'for').rstrip('against') self.relevance_token = form.xpath( './/input[@name="authenticity_token"]')[0].attrib['value'] self.score = int( tree.xpath( './/div[has-class("figures")]//figure[has-class("score")]') [0].text)
def get_thread_mails(self): mails = { 'member': {}, 'messages': [], } try: mails['member']['pseudo'] = self.parser.tocleanstring( self.document.getroot().cssselect( '#message_heading div.username span.name')[0]) except IndexError: mails['member']['pseudo'] = 'Unknown' for li in reversed( self.document.xpath( '//ul[@id="thread"]//li[contains(@id, "message_")]')): try: txt = self.parser.tostring( li.xpath('.//div[@class="message_body"]')[0]) except IndexError: continue # 'Match' message txt = html2text(txt).strip() m = re.search( r'(\d+), ', li.xpath('.//span[@class="timestamp"]//script')[0].text) assert m date = local2utc(datetime.fromtimestamp(int(m.group(1)))) id_from = li.find('a').attrib['href'].split('/')[-1].split('?')[0] mails['messages'].append({ 'date': date, 'message': unicode(txt), 'id_from': unicode(id_from), }) return mails
def parse(self): self.url = '%s#%s' % (self.preurl, self.div.attrib['id']) self.title = unicode( self.div.find('h2').xpath('.//a[has-class("title")]')[0].text) try: a = self.div.find('p').xpath('.//a[@rel="author"]')[0] except IndexError: self.author = 'Anonyme' self.username = None else: self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.date = datetime.strptime( self.div.find('p').xpath('.//time')[0].attrib['datetime'].split( '+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) content = self.div.find('div') try: signature = content.xpath('.//p[has-class("signature")]')[0] except IndexError: # No signature. pass else: content.remove(signature) self.signature = lxml.html.tostring(signature).decode('utf-8') self.body = lxml.html.tostring(content).decode('utf-8') self.score = int( self.div.find('p').xpath('.//span[has-class("score")]')[0].text) forms = self.div.find('footer').xpath( './/form[has-class("button_to")]') if len(forms) > 0: self.relevance_url = forms[0].attrib['action'].rstrip( 'for').rstrip('against') self.relevance_token = forms[0].xpath( './/input[@name="authenticity_token"]')[0].attrib['value']
def parse_dt(s): d = _parse_dt(s) return local2utc(d)