def getCommentTree(nodes, url, linkid, commentid, args, depth): global ccount for node in nodes: try: if node is None: break elif node['kind'] == 't1': try: cur.execute("""replace into t1 ( id, link_id, parent_id, body, author, created, last_seen ) values (%s, %s, %s, %s, %s, %s, now())""", ( lib.base36decode(node['data']['id']), node['data']['link_id'], node['data']['parent_id'], node['data']['body'], node['data']['author'], datetime.datetime.fromtimestamp(node['data']['created_utc']) )) db.commit() ccount += 1 if node['data']['replies'] != "": getCommentTree([node['data']['replies']], url, linkid, commentid, args, depth) except Exception, e: log.write('Error storing t1_' + node['data']['id'] + ': %s' % e, 'error') db.rollback() elif node['kind'] == "Listing": getCommentTree(node['data']['children'], url, linkid, commentid, args, depth) elif node['kind'] == "more": if _['autoget_lte_20'] and node['data']['count'] <= 20 and node['data']['count'] >= _['autoget_threshold']: children = ",".join(node['data']['children']) time.sleep(_['sleep']) get('http://www.reddit.com/api/morechildren/', linkid, "", "api_type=json&depth=8&link_id=%s&children=%s" % (linkid, children), 0, True) elif node['data']['count'] >= _['comment_traverse_threshold']: if node['data']['parent_id'] == linkid or node['data']['parent_id'] == commentid: #sibling traversal breadth = 0 for child in node['data']['children']: if breadth >= _['comment_siblings_total']: break time.sleep(_['sleep']) get(url, linkid, child, args, depth) breadth += 1 else: #child traversal time.sleep(_['sleep']) get(url, linkid, node['data']['parent_id'][3:], args, depth + 1)
rJSON = f.read() f.close() try: links = json.loads(rJSON) except Exception, e: log.write('Error parsing links url: %s - %s' % (finalUrl, e), 'error') return after = links['data']['after'] for l in links['data']['children']: try: if l['kind'] == 't3': try: cur.execute("select id from t3 where id = %s", (lib.base36decode(l['data']['id']),)) if cur.rowcount > 0: cur.execute("update t3 set last_seen = now() where id = %s", (lib.base36decode(l['data']['id']),)) else: if l['data']['is_self']: content = l['data']['selftext'] else: content = None; cur.execute("""insert into t3 ( id, title, url, permalink, content, author, created, last_seen,