def compare2versions(): ljname = 'evo-lutio' filename1 = '{0}/calendar/_{0}_.txt'.format(ljname) filename2 = '{0}/calendar/_compare_.txt'.format(ljname) f1 = set(lj.loadfromfile(filename1, lines=True)) f2 = set(lj.loadfromfile(filename2, lines=True)) print('now: {}'.format(len(f1))) print('bef: {}'.format(len(f2))) print('now-bef: {}'.format(len(f1 - f2))) print('bef-now: {}'.format(len(f2 - f2))) print('now-bef') print(list(f1 - f2)) print('bef-now') print(list(f2 - f1))
def uniqfilelines(filename, sort=False): lst = lj.loadfromfile(filename, True) lst = list(set(lst)) lst = [l for l in lst if l] if sort: lst.sort(reverse=True, key=lambda url: int(lj.extractpostid(url))) lj.savetofile('\n'.join(lst), filename + '_u_.txt')
def parseljpost(url, forced: bool): postid = lj.extractpostid(url) ljname = lj.extractljname(url) filename = '{}/raw/{}.raw'.format(ljname, postid) if not forced and lj.checkfileexist('{}/data/{}.htm.json'.format(ljname, postid)): return if lj.checkfileexist(filename): soup = BeautifulSoup(lj.loadfromfile(filename), features='html.parser') jsondata = json.loads(lj.loadfromfile(filename + '.json')) jsondata['entry']['tags'] = gettags(soup) jsondata['entry']['ljname'] = ljname article = cleanarticle(soup.find('article', class_='entry-content')) lj.savetofile(article.prettify(), '{}/data/{}.htm'.format(ljname, postid)) article = loadimagesa(ljname, postid, article) jsondata['comments'] = loadimagesc(ljname, postid, jsondata['comments']) jsondata['comments'] = loaduserpics(ljname, postid, jsondata['comments']) lj.savetofile(jsondata, '{}/data/{}.htm.json'.format(ljname, postid)) lj.savetofile(article.prettify(), '{}/data/{}.htm'.format(ljname, postid))
def makeupdate(ljname: str, count: int): if not count: return lj.title('make web update') dir = lambda ljname, dir: '{ljname}/{dir}'.format(ljname=ljname, dir=dir) udir = lambda ljname, dir: '{ljname}/_update_/{dir}'.format(ljname=ljname, dir=dir if ljname not in dir else dir.repl(ljname, ljname+'/_update_', 1)) # lj.checkpath(udir(ljname,'data/')) # lj.checkpath(udir(ljname,'images/userpics/')) # lj.checkpath(udir(ljname,'images/comments/')) posts = lj.loadfromfile('{0}/calendar/_{0}_.txt'.format(ljname), True)[:count+1] files = list() upics = list() ids = list() for post in posts: postid = lj.extractpostid(post) ids.append(postid) jsondata = json.loads(lj.loadfromfile('{dir}/{postid}.htm.json'.format(dir=dir(ljname, 'data'), postid=postid))) for comment in jsondata['comments']: filename = comment.get('userpic') if filename: filename = filename.split('/')[-1] if filename not in upics: upics.append(filename) files += [postid + '.htm', postid + '.htm.json'] print(len(upics)) imgext = ['.jpg', '.png', '.gif', '.svg'] images = [file for file in os.listdir(dir(ljname, 'images/')) if file.endswith(tuple(imgext))] images = [file for file in images if file.split('-', 1)[0] in ids] cimages = [file for file in os.listdir(dir(ljname, 'images/comments/')) if file.endswith(tuple(imgext))] cimages = [file for file in cimages if file.split('-', 1)[0] in ids] # copyallfiles(files, dir(ljname, 'data/'), udir(ljname, 'data/'), True) # copyallfiles(images, dir(ljname, 'images/'), udir(ljname, 'images/'), False) # copyallfiles(upics, dir(ljname, 'images/userpics/'), udir(ljname, 'images/userpics/'), False) # print('creating update is complete!') lj.title('uploading data to ftp...') ftptransfer(files, dir(ljname, 'data/'), 'data/', True) lj.title('uploading images to ftp...') ftptransfer(images, dir(ljname, 'images/'), 'images/', False) lj.title('uploading userpics to ftp...') ftptransfer(cimages, dir(ljname, 'images/comments/'), 'images/comments/', False) lj.title('uploading comment images to ftp...') ftptransfer(upics, dir(ljname, 'images/userpics/'), 'images/userpics/', False) ftptransfer(['_{}_.txt'.format(ljname)], dir(ljname, 'calendar/'), 'calendar/', True) print('uploading is complete!')
def processing(ljname: str, skip=None or int, maxcount=None or int, forced=False): lj.title('processing raw -- to --> data') filename = '{0}/calendar/_{0}_.txt'.format(ljname) if lj.checkfileexist(filename): posts = lj.loadfromfile(filename, lines=True) for post in posts[skip:maxcount]: print(post) url = post.split(' ', 1)[0] parseljpost(url, forced) print('-'*40) print(len(posts))
def testdata(ljname: str): lj.title('testing') dir = '{}/raw/'.format(ljname) posts = lj.loadfromfile('{0}/calendar/_{0}_.txt'.format(ljname), True) postsids = list(map(lj.extractpostid, posts)) for file in os.listdir(dir): if file.endswith('.json'): postid = int(file.split('.',1)[0]) filename = dir + file jsondata = json.loads(lj.loadfromfile(filename)) nav = jsondata['entry']['nav'] if len(nav['prev']) < 5 or len(nav['next']) < 5: print('{} * {} * {}'.format(nav['prev'], postid, nav['next'])) if str(postid) in postsids: nav = lj.getljnavigation(posts, postsids.index(str(postid))) if len(nav['next']) > 5 or len(nav['prev']) > 5: jsondata['entry']['nav'] = nav lj.savetofile(jsondata, filename, 'w') print('Навигация успешно исправлена: {} {} {}'.format(postid, nav['prev'], nav['next'])) else: print('Не удалось исправить навигацию: {}'.format(postid)) else: print('Пост {} отсутствует в списке постов. Обновите список постов!'.format(postid))
def makehtml(ljname: str, skip=None, maxcount=None, mkposts=True, mkindex=True): # генерирует html контент на основе обработанных данных lj.title('generate posts .htmls') filename = '{0}/calendar/_{0}_.txt'.format(ljname) lj.savetofile('', '{0}/calendar/_collapsed_.txt'.format(ljname), 'w') # clear comment errors file if lj.checkfileexist(filename): posts = lj.loadfromfile(filename, lines=True) if mkposts: for post in posts[skip:maxcount]: print(post) url = post.split(' ', 1)[0] makepost(url) if mkindex: makeindex(ljname, posts) uniqfilelines('{0}/calendar/_collapsed_.txt'.format(ljname), sort=True)
def makeindex(ljname, posts): lj.title('generate html index') csstime = int(time.time()) template = lj.loadfromfile('_index_templ.html').split('~') templateblocks = { 'start': template[0], 'ok': template[1], 'none': template[2], 'end': template[3] } fields = {'ljname': ljname, 'time': csstime} html = templateblocks['start'].format(**fields) for item in posts: post = item.split(' ', 1) url = post[0] title = post[1] postid = lj.extractpostid(url) block = 'ok' if lj.checkfileexist('{}/post/{}.html'.format( ljname, postid)) else 'none' html += templateblocks[block].format(postid=postid, title=title) html += templateblocks['end'] lj.savetofile(html, '{}/index.html'.format(ljname))
def makepost(url): csstime = int(time.time()) postid = lj.extractpostid(url) ljname = lj.extractljname(url) filename = '{}/data/{}.htm'.format(ljname, postid) if lj.checkfileexist(filename): article = BeautifulSoup(lj.loadfromfile(filename), features='html.parser') jsondata = json.loads(lj.loadfromfile(filename + '.json')) template = lj.loadfromfile('_post_templ.html').split('~') templateblocks = { 'ulopen': template[2], 'comment': template[3], 'ulclose': template[4] } entry = jsondata['entry'] nav = entry['nav'] next = lj.extractpostid(nav['next']) prev = lj.extractpostid(nav['prev']) fields = { 'title': entry['title'], 'article': article, 'prev': prev, 'prevtitle': nav['prevtitle'], 'next': next, 'nexttitle': nav['nexttitle'], 'postid': entry['ditemid'], 'ljname': entry['ljname'], 'time': csstime } html = template[0].format(**fields) comments = jsondata['comments'] cids = dict() currentlevel = 0 for comment in comments: parent = comment.get('parent', None) dtalkid = comment.get('dtalkid') if dtalkid: dtalkid = str(dtalkid) if not parent: comment['level'] = 0 else: parent = str(parent) comment['level'] = cids[parent] + 1 cids[dtalkid] = comment['level'] cssclass = 'author' if comment['uname'] == jsondata['entry'][ 'journal'] else '' cfields = { 'level': comment['level'], 'userpic': comment.get('userpic'), 'user': comment['uname'], 'article': comment.get('article'), 'class': cssclass, 'userlj': comment.get('commenter_journal_base') } delta = comment['level'] - currentlevel if delta: tag = templateblocks[ 'ulopen'] if delta > 0 else templateblocks['ulclose'] html += tag * abs(delta) html += templateblocks['comment'].format(**cfields) currentlevel = comment['level'] else: print('Читать комменты: ' + url) lj.savetofile('{} {}\n'.format(url, entry['title']), '{0}/calendar/_collapsed_.txt'.format(ljname), 'a') # add comment error to file html += template[1].format(**fields) html = html.replace('href="https://{}.livejournal.com/'.format(ljname), 'href="') html = html.replace('href="http://{}.livejournal.com/'.format(ljname), 'href="') lj.savetofile(html, '{}/post/{}.html'.format(ljname, postid))