def _tokenize_title(hostname, api_key): """ 全ブログエントリを取得し、転置インデックス用のデータを作る """ _, total = tumblr.getTotalPosts(hostname, api_key) docs = [] if total: offset = 0 limit = 50 while offset < total: res = tumblr.getPosts(hostname, api_key, offset, limit) if res: for post in res['response']['posts']: dt = datetime.strptime(post['date'], '%Y-%m-%d %H:%M:%S %Z') # スコアは投稿時刻が新しいものほど高い(とりあえず) # XXX スコアはドキュメント単位でなく、ドキュメント-tokenのペアごとに付与してもいい XXX score = (dt - EPOCH).total_seconds() title = post.get('title', '') nouns = _extract_nouns(title) if title else set() # ドキュメントID, タイトル(元の文字列)、token、スコア値 doc = {'id': str(post['id']), 'title': title, 'tokens': nouns, 'score': score} docs.append(doc) offset += limit return docs
def updateAll(config, hostname): api_key = config.get("Target", "api_key") (blog_name, total) = tumblr.getTotalPosts(hostname, api_key) if total: docids_new = set() offset = 0 limit = 50 # insert & update while offset < total: res = tumblr.getPosts(hostname, api_key, offset, limit) if res: for post in res['response']['posts']: doc = _makeDoc(post) docids_new.add(doc['id']) _postToSolr(doc) _commit() offset += limit # remove deleted documents docids_old = _getAllDocIds(blog_name) docids_deleted = docids_old - docids_new for docid in docids_deleted: _deleteFromSolr(str(docid)) _commit()
except getopt.GetoptError as e: onError(1, str(e)) if len(sys.argv) == 1: # no options passed onError(2, 2) verbose = False keepGoing = False writeLog = False for option, argument in myopts: if option in ('-b', '--blog'): blog = argument elif option in ('-k', '--keepgoing'): keepGoing = True elif option in ('-l', '--log'): writeLog = True elif option in ('-v', '--verbose'): # verbose output verbose = True elif option in ('-h', '--help'): # display help text usage(0) mainDir, downloadDir, gifDir, videoDir = checkDirectories(defaultDownloadDir, subDir, blog, gifDir, videoDir, verbose) client = authenticateClient(verbose) posts = getPosts(client, blog, mainDir, downloadDir, gifDir, videoDir, keepGoing, writeLog, verbose)
except getopt.GetoptError as e: onError(1, str(e)) if len(sys.argv) == 1: # no options passed onError(2, 2) verbose = False keepGoing = False writeLog = False for option, argument in myopts: if option in ('-b', '--blog'): blog = argument elif option in ('-k', '--keepgoing'): keepGoing = True elif option in ('-l', '--log'): writeLog = True elif option in ('-v', '--verbose'): # verbose output verbose = True elif option in ('-h', '--help'): # display help text usage(0) mainDir, downloadDir, gifDir, videoDir = checkDirectories( defaultDownloadDir, subDir, blog, gifDir, videoDir, verbose) client = authenticateClient(verbose) posts = getPosts(client, blog, mainDir, downloadDir, gifDir, videoDir, keepGoing, writeLog, verbose)